first backup of charts

2025-09-23 10:01:17 +08:00
commit cbfc0104a6
170 changed files with 17788 additions and 0 deletions
--- a/code/codeserver
+++ b/code/codeserver
--- a/code/metadata.yaml
+++ b/code/metadata.yaml
@ -0,0 +1,55 @@
 application_name: &application_name code
 distributed: 
  method: helm
  release_name: *application_name
  chart: codeserver/ci/helm-chart
  sets:
    image: 
      repository: codercom/code-server
      tag: '4.103.2'
      pullPolicy: IfNotPresent
    resources: 
      limits:
        nvidia.com/gpu: 0
    nodeSelector: 
      resource-group: gpu_5880
    service:
      type: NodePort
      port: 8080
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30083
    url: ~
  pod:
    name: *application_name
 monolithic: 
  method: helm
  release_name: *application_name
  chart: codeserver/ci/helm-chart
  sets:
    image: 
      repository: codercom/code-server
      tag: '4.103.2'
      pullPolicy: IfNotPresent
    resources: 
      limits:
        nvidia.com/gpu: 1
    nodeSelector: 
      resource-group: gpu_5880
    service:
      type: NodePort
      port: 8080
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30083
    url: ~
  pod:
    name: *application_name
--- a/edgetts/metadata.yaml
+++ b/edgetts/metadata.yaml
@ -0,0 +1,47 @@
 application_name: &application_name edgetts
 distributed: 
  method: helm
  release_name: *application_name
  chart: test-tts
  sets:
    image: 
      repository: travisvn/openai-edge-tts
      tag: "latest"
      pullPolicy: IfNotPresent
    service:
      type: NodePort
      port: 5050
      nodePort: 30250
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30250
    url: ~
  pod:
    name: *application_name
 monolithic: 
  method: helm
  release_name: *application_name
  chart: test-tts
  sets:
    image: 
      repository: travisvn/openai-edge-tts
      tag: "latest"
      pullPolicy: IfNotPresent
    service:
      type: NodePort
      port: 5050
      nodePort: 30250
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30250
    url: ~
  pod:
    name: *application_name
--- a/edgetts/test-tts/.helmignore
+++ b/edgetts/test-tts/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/edgetts/test-tts/Chart.yaml
+++ b/edgetts/test-tts/Chart.yaml
@ -0,0 +1,24 @@
 apiVersion: v2
 name: test-tts
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/edgetts/test-tts/templates/NOTES.txt
+++ b/edgetts/test-tts/templates/NOTES.txt
@ -0,0 +1,22 @@
 1. Get the application URL by running these commands:
 {{- if .Values.ingress.enabled }}
 {{- range $host := .Values.ingress.hosts }}
  {{- range .paths }}
  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
  {{- end }}
 {{- end }}
 {{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "test-tts.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "test-tts.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "test-tts.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "test-tts.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/edgetts/test-tts/templates/_helpers.tpl
+++ b/edgetts/test-tts/templates/_helpers.tpl
@ -0,0 +1,62 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "test-tts.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "test-tts.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "test-tts.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "test-tts.labels" -}}
 helm.sh/chart: {{ include "test-tts.chart" . }}
 {{ include "test-tts.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "test-tts.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "test-tts.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "test-tts.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "test-tts.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/edgetts/test-tts/templates/deployment.yaml
+++ b/edgetts/test-tts/templates/deployment.yaml
@ -0,0 +1,78 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "test-tts.fullname" . }}
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
 spec:
  {{- if not .Values.autoscaling.enabled }}
  replicas: {{ .Values.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "test-tts.selectorLabels" . | nindent 6 }}
  template:
    metadata:
      {{- with .Values.podAnnotations }}
      annotations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "test-tts.labels" . | nindent 8 }}
        {{- with .Values.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "test-tts.serviceAccountName" . }}
      {{- with .Values.podSecurityContext }}
      securityContext:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      containers:
        - name: {{ .Chart.Name }}
          {{- with .Values.securityContext }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{ .Values.image.pullPolicy }}
          ports:
            - name: http
              containerPort: {{ .Values.service.port }}
              protocol: TCP
          {{- with .Values.livenessProbe }}
          livenessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.readinessProbe }}
          readinessProbe:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.resources }}
          resources:
            {{- toYaml . | nindent 12 }}
          {{- end }}
          {{- with .Values.volumeMounts }}
          volumeMounts:
            {{- toYaml . | nindent 12 }}
          {{- end }}
      {{- with .Values.volumes }}
      volumes:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
--- a/edgetts/test-tts/templates/hpa.yaml
+++ b/edgetts/test-tts/templates/hpa.yaml
@ -0,0 +1,32 @@
 {{- if .Values.autoscaling.enabled }}
 apiVersion: autoscaling/v2
 kind: HorizontalPodAutoscaler
 metadata:
  name: {{ include "test-tts.fullname" . }}
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
 spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: {{ include "test-tts.fullname" . }}
  minReplicas: {{ .Values.autoscaling.minReplicas }}
  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
  metrics:
    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
    {{- end }}
    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
    {{- end }}
 {{- end }}
--- a/edgetts/test-tts/templates/ingress.yaml
+++ b/edgetts/test-tts/templates/ingress.yaml
@ -0,0 +1,43 @@
 {{- if .Values.ingress.enabled -}}
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: {{ include "test-tts.fullname" . }}
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
  {{- with .Values.ingress.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 spec:
  {{- with .Values.ingress.className }}
  ingressClassName: {{ . }}
  {{- end }}
  {{- if .Values.ingress.tls }}
  tls:
    {{- range .Values.ingress.tls }}
    - hosts:
        {{- range .hosts }}
        - {{ . | quote }}
        {{- end }}
      secretName: {{ .secretName }}
    {{- end }}
  {{- end }}
  rules:
    {{- range .Values.ingress.hosts }}
    - host: {{ .host | quote }}
      http:
        paths:
          {{- range .paths }}
          - path: {{ .path }}
            {{- with .pathType }}
            pathType: {{ . }}
            {{- end }}
            backend:
              service:
                name: {{ include "test-tts.fullname" $ }}
                port:
                  number: {{ $.Values.service.port }}
          {{- end }}
    {{- end }}
 {{- end }}
--- a/edgetts/test-tts/templates/service.yaml
+++ b/edgetts/test-tts/templates/service.yaml
@ -0,0 +1,16 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "test-tts.fullname" . }}
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
 spec:
  type: {{ .Values.service.type }}
  ports:
    - port: {{ .Values.service.port }}
      targetPort: http
      protocol: TCP
      name: http
      nodePort: {{ .Values.service.nodePort }}
  selector:
    {{- include "test-tts.selectorLabels" . | nindent 4 }}
--- a/edgetts/test-tts/templates/serviceaccount.yaml
+++ b/edgetts/test-tts/templates/serviceaccount.yaml
@ -0,0 +1,13 @@
 {{- if .Values.serviceAccount.create -}}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "test-tts.serviceAccountName" . }}
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
  {{- with .Values.serviceAccount.annotations }}
  annotations:
    {{- toYaml . | nindent 4 }}
  {{- end }}
 automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
 {{- end }}
--- a/edgetts/test-tts/templates/tests/test-connection.yaml
+++ b/edgetts/test-tts/templates/tests/test-connection.yaml
@ -0,0 +1,15 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: "{{ include "test-tts.fullname" . }}-test-connection"
  labels:
    {{- include "test-tts.labels" . | nindent 4 }}
  annotations:
    "helm.sh/hook": test
 spec:
  containers:
    - name: wget
      image: busybox
      command: ['wget']
      args: ['{{ include "test-tts.fullname" . }}:{{ .Values.service.port }}']
  restartPolicy: Never
--- a/edgetts/test-tts/values.yaml
+++ b/edgetts/test-tts/values.yaml
@ -0,0 +1,124 @@
 # Default values for test-tts.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
 replicaCount: 1
 # This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
 image:
  repository: travisvn/openai-edge-tts
  # This sets the pull policy for images.
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  tag: "latest"
 # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
 imagePullSecrets: []
 # This is to override the chart name.
 nameOverride: "edgetts"
 fullnameOverride: ""
 # This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
 serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
 # This is for setting Kubernetes Annotations to a Pod.
 # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
 podAnnotations: {}
 # This is for setting Kubernetes Labels to a Pod.
 # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
 podLabels: {}
 podSecurityContext: {}
  # fsGroup: 2000
 securityContext: {}
  # capabilities:
  #   drop:
  #   - ALL
  # readOnlyRootFilesystem: true
  # runAsNonRoot: true
  # runAsUser: 1000
 # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
 service:
  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: NodePort
  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 5050
  nodePort: 30250
 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
 ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
 # This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
 #livenessProbe:
 #  httpGet:
 #    path: /
 #    port: http
 #readinessProbe:
 #  httpGet:
 #    path: /
 #    port: http
 # This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
 autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80
 # Additional volumes on the output Deployment definition.
 volumes: []
 # - name: foo
 #   secret:
 #     secretName: mysecret
 #     optional: false
 # Additional volumeMounts on the output Deployment definition.
 volumeMounts: []
 # - name: foo
 #   mountPath: "/etc/foo"
 #   readOnly: true
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/jarvis/jarvis/.helmignore
+++ b/jarvis/jarvis/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/jarvis/jarvis/Chart.yaml
+++ b/jarvis/jarvis/Chart.yaml
@ -0,0 +1,24 @@
 apiVersion: v2
 name: jarvis
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/jarvis/jarvis/templates/NOTES.txt
+++ b/jarvis/jarvis/templates/NOTES.txt
@ -0,0 +1,22 @@
 1. Get the application URL by running these commands:
 {{- if .Values.ingress.enabled }}
 {{- range $host := .Values.ingress.hosts }}
  {{- range .paths }}
  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
  {{- end }}
 {{- end }}
 {{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/jarvis/jarvis/templates/_helpers.tpl
+++ b/jarvis/jarvis/templates/_helpers.tpl
@ -0,0 +1,62 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "jarvis.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "jarvis.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "jarvis.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "jarvis.labels" -}}
 helm.sh/chart: {{ include "jarvis.chart" . }}
 {{ include "jarvis.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "jarvis.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "jarvis.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "jarvis.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/jarvis/jarvis/templates/images-pv.yaml
+++ b/jarvis/jarvis/templates/images-pv.yaml
@ -0,0 +1,17 @@
 # pv.yaml
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Release.Name }}-pv  # PV 的名称，可以自定义
 spec:
  storageClassName: local-path # 添加这一行，与上面StorageClass的名称一致
  capacity:
    storage: 500Gi       # PV 的容量，可以根据 NFS 共享的实际大小或预期使用量调整
  accessModes:
    - ReadWriteMany     # 访问模式：
  persistentVolumeReclaimPolicy: Retain 
  nfs:
    path: /volume1/Dataset/PVStore/lab-data-dataset-pvc-ec4aba12-c683-4168-b335-7b1a8819581a/Private/cache-images  # NFS 服务器上共享的路径
    server: 10.6.80.11 # NFS 服务器的 IP 地址或主机名
--- a/jarvis/jarvis/templates/images-pvc.yaml
+++ b/jarvis/jarvis/templates/images-pvc.yaml
@ -0,0 +1,14 @@
 # pvc.yaml
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Release.Name }}-pvc   # PVC 的名称，Pod 会引用这个名称
  #namespace: default     # PVC 所在的命名空间，通常是 default 或你自定义的命名空间
 spec:
  storageClassName: local-path # 添加这一行，与PV和StorageClass的名称一致
  accessModes:
    - ReadWriteMany     # 访问模式，必须与 PV 的 accessModes 匹配或更宽松
  resources:
    requests:
      storage: 50Gi      # PVC 请求的存储容量，必须小于或等于 PV 的容量
  volumeName: {{ .Release.Name }}-pv # 明确指定要绑定的 PV 的名称，这是手动绑定 PV 的关键
--- a/jarvis/jarvis/templates/jarvis-adapter.yaml
+++ b/jarvis/jarvis/templates/jarvis-adapter.yaml
@ -0,0 +1,68 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: jarvis-adapter-deployment
  # namespace: jarvis-models
  labels:
    app: jarvis-adapter
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: jarvis-adapter
  template:
    metadata:
      labels:
        app: jarvis-adapter
    spec:
      #hostNetwork: true
      # --- START: Add this section for image pull secrets ---
      imagePullSecrets:
      - name: regcred # This MUST match the name of the secret you just created
      # --- END: Add this section ---
      containers:
      - name: jarvis-adapter
        image: {{ .Values.jarvis_adapter.image }}
        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
        env:
          - name: INFERENCE_ENDPOINT
            value: {{ .Values.jarvis_adapter.endpoint }}
        ports:
        - containerPort: 5000 # The port your application listens on inside the container
          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
          protocol: TCP
        resources:  # Add this section
          requests:
            cpu: 100m  # Example: 100 millicores (0.1 CPU)
            memory: 256Mi # Example: 128 mebibytes
          limits:
            cpu: 500m  # Example: Limit to 500 millicores (0.5 CPU)
            memory: 512Mi # Example: Limit to 512 mebibytes
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: llm-blackbox
  # namespace: jarvis-models
  labels:
    app: jarvis-adapter
 spec:
  selector:
    app: jarvis-adapter
  ports:
    - protocol: TCP
      port: 80
      targetPort: 5000
  type: NodePort
--- a/jarvis/jarvis/templates/jarvis-api.yaml
+++ b/jarvis/jarvis/templates/jarvis-api.yaml
@ -0,0 +1,85 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: jarvis-api-deployment
  # namespace: jarvis-models
  labels:
    app: jarvis-api
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: jarvis-api
  template:
    metadata:
      labels:
        app: jarvis-api
    spec:
      #hostNetwork: true
      # --- START: Add this section for image pull secrets ---
      imagePullSecrets:
      - name: regcred # This MUST match the name of the secret you just created
      # --- END: Add this section ---
      containers:
      - name: jarvis-api
        image: {{ .Values.jarvis_api.image }}
        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
        ports:
        - containerPort: 8080 # The port your application listens on inside the container
          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
          protocol: TCP
        resources:  # Add this section
          requests:
            cpu: 100m  # Example: 100 millicores (0.1 CPU)
            memory: 256Mi # Example: 128 mebibytes
          limits:
            cpu: 500m  # Example: Limit to 500 millicores (0.5 CPU)
            memory: 512Mi # Example: Limit to 512 mebibytes
        volumeMounts:
        - name: env-config-volume
          mountPath: /.env.yml
          subPath: .env.yml
          readOnly: true
        - name: images-data
          mountPath: /images
      volumes:
      - name: env-config-volume
        #hostPath:
        #  path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
        #  type: FileOrCreate
        configMap:
          name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
          items:
            - key: .api.env.yml    # This is the key defined in the ConfigMap's data section
              path: .env.yml   # This is the filename inside the mountPath (e.g., /.env.yml)
      - name: images-data
        persistentVolumeClaim:
          claimName: {{ .Release.Name }}-pvc
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Release.Name }}-api-service
  # namespace: jarvis-models
  labels:
    app: jarvis-api
 spec:
  selector:
    app: jarvis-api
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080
  type: NodePort
--- a/jarvis/jarvis/templates/jarvis-configmap.yaml
+++ b/jarvis/jarvis/templates/jarvis-configmap.yaml
@ -0,0 +1,183 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Release.Name }}-cm # Name of your ConfigMap
  # namespace: jarvis-models # Ensure this matches your Deployment's namespace
 data:
  .api.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
    database:
      host: 10.6.14.130
      port: 3306
      username: jarvis
      password: boardwarejarvis
      database: jarvis
    jwt:
      secret: secretkey
    users:
      - username: jarvis
        password: boardwarejarvis
      - username: user
        password: boardwareuser
      - username: g2e
        password: g2e
      - username: vera
        password: vera
      - username: ivan
        password: ivan    
    blackbox:
      mode: 0 # 0: /?blackbox=models, 1: /models-blackbox
      url: http://jarvis-model-service
      port: 8080
      vad:
        url: http://vad-blackbox
      asr:
        url: http://asr-blackbox
      llm:
        url: http://llm-blackbox
      vlm:
        url: http://vlm-blackbox
      tts:
        url: http://tts-blackbox
      chatllama:
        url: http://chatllama-blackbox
      chroma:
        upsert_url: http://chroma-blackbox/upsert
    env: dev
    authentik:
      redirectUri: http://10.6.14.130:4200
      baseUrl: https://authentik.universalmacro.com
      clientId: xxx
      clientSecret: xxx
    server:
      port: 8080
    # log:
    #   path: "/Workspace/Logging/logtime.out"
    log:
      loki:
        # url: "https://103.192.46.20:27002/laas/1868865592451137536/loki/api/v1/push"
        url: "https://loki.bwgdi.com/loki/api/v1/push"
        x-odin-auth: "log_m7uxtqtru2318hbaoonf9wgjy8chcnebhwhl0wncsvfctu2ppn9m53q6p3i3"
      labels:
        app: jarvis
        env: dev
        location: "k3s_gdi"
    model:
      tts:
        url: http://10.6.14.130:8000/?blackbox_name=tts
        tts_model_name: melotts
        tts_stream: false
      streaming:
        url: http://10.6.14.130:8000/?blackbox_name=chat
        vlms_url: http://10.6.14.130:8000/?blackbox_name=vlms
  .models.env.yml: | # The key here will be the filename in the pod (e.g., /.env.yml)
    env:
      version: 0.0.1
      host: 0.0.0.0
      port: 8000
    log:
      level: debug
      time_format: "%Y-%m-%d %H:%M:%S"
      filename: "./jarvis-models.log"
    loki:
      url: "https://loki.bwgdi.com/loki/api/v1/push"
      labels:
        app: jarvis
        env: dev
        location: "k3s_gdi"
        layer: models
    melotts:
      mode: local # or docker
      url: http://10.6.44.141:18080/convert/tts
      speed: 0.9
      device: 'cuda:0'
      language: 'ZH'
      speaker: 'ZH'
    cosyvoicetts:
      mode: local # or docker
      url: http://10.6.44.141:18080/convert/tts
      speed: 0.9
      device: 'cuda:0'
      language: '粤语女'
      speaker: 'ZH'
    sovitstts:
      mode: docker
      url: http://10.6.80.90:9880/tts
      speed: 0.9
      device: 'cuda:0'
      language: 'ZH'
      speaker: 'ZH'
      text_lang: "yue"
      ref_audio_path: "output/slicer_opt/Ricky-Wong/Ricky-Wong-3-Mins.wav_0006003840_0006134080.wav"
      prompt_lang: "yue"
      prompt_text: "你失敗咗點算啊？你而家安安穩穩，點解要咁樣做呢？"
      text_split_method: "cut5"
      batch_size: 1
      media_type: "wav"
      streaming_mode: True
    sensevoiceasr:
      mode: local # or docker
      url: http://10.6.44.141:18080/convert/tts
      speed: 0.9
      device: 'cuda:0'
      language: '粤语女'
      speaker: 'ZH'
    tesou:
      url: http://120.196.116.194:48891/chat/
    TokenIDConverter:
      token_path: src/asr/resources/models/token_list.pkl
      unk_symbol: <unk>
    CharTokenizer:
      symbol_value:
      space_symbol: <space>
      remove_non_linguistic_symbols: false
    WavFrontend:
      cmvn_file: src/asr/resources/models/am.mvn
      frontend_conf:
        fs: 16000
        window: hamming
        n_mels: 80
        frame_length: 25
        frame_shift: 10
        lfr_m: 7
        lfr_n: 6
        filter_length_max: -.inf
        dither: 0.0
    Model:
      model_path: src/asr/resources/models/model.onnx
      use_cuda: false
      CUDAExecutionProvider:
          device_id: 0
          arena_extend_strategy: kNextPowerOfTwo
          cudnn_conv_algo_search: EXHAUSTIVE
          do_copy_in_default_stream: true
      batch_size: 3
    blackbox:
      lazyloading: true
    vlms:
      urls: 
        qwen_vl: http://vl-svc
        vlm: http://vl-svc:8080
    path:
      chroma_rerank_embedding_model: /Model/BAAI
      cosyvoice_path: /Voice/CosyVoice
      cosyvoice_model_path: /Voice/CosyVoice/pretrained_models
      sensevoice_model_path: /Voice/SenseVoice/SenseVoiceSmall 
--- a/jarvis/jarvis/templates/jarvis-models.yaml
+++ b/jarvis/jarvis/templates/jarvis-models.yaml
@ -0,0 +1,96 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: jarvis-model-deployment
  # namespace: jarvis-models
  labels:
    app: jarvis-model
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: jarvis-model
  template:
    metadata:
      labels:
        app: jarvis-model
    spec:
      #hostNetwork: true
      # --- START: Add this section for image pull secrets ---
      imagePullSecrets:
      - name: regcred # This MUST match the name of the secret you just created
      # --- END: Add this section ---
      runtimeClassName: nvidia
      containers:
      - name: jarvis-model
        image: {{ .Values.jarvis_model.image }}
        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
        # command: ["sleep", "infinity"]
        ports:
        - containerPort: 8000 # The port your application listens on inside the container
          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
          protocol: TCP
        resources:  # Add this section
          requests:
            cpu: 1  # Example: 100 millicores (0.1 CPU)
            memory: 512Mi  # Example: 128 mebibytes
          limits:
            cpu: 2  # Example: Limit to 500 millicores (0.5 CPU)
            memory: 1Gi # Example: Limit to 512 mebibytes
            nvidia.com/gpu: 1
        volumeMounts:
        - name: env-config-volume
          mountPath: /jarvis-models/.env.yaml
          subPath: .env.yaml
          readOnly: true
        - name: nfs-volume
          subPath: Weight
          mountPath: /Model
        - name: nfs-volume
          subPath: Voice
          mountPath: /Voice
      volumes:
      - name: env-config-volume
        #hostPath:
        #  path: /Workspace/HDD1/code/jarvis-api/.env.yml # Host path for .env.yml
        #  type: FileOrCreate
        configMap:
          name: {{ .Release.Name }}-cm # This MUST match the name of the ConfigMap you created
          items:
            - key: .models.env.yml    # This is the key defined in the ConfigMap's data section
              path: .env.yaml   # This is the filename inside the mountPath (e.g., /.env.yml)
      - name: nfs-volume
        nfs :
          server: "10.6.80.11"
          path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
      #{{- range .Values.volumes }}
      #- {{ . | toYaml | nindent 10 | trim }}
      #{{- end }}  
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: jarvis-model-service
  # namespace: jarvis-models
  labels:
    app: jarvis-model
 spec:
  selector:
    app: jarvis-model
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8000
  type: NodePort
--- a/jarvis/jarvis/values.yaml
+++ b/jarvis/jarvis/values.yaml
@ -0,0 +1,87 @@
 # Default values for jarvis.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
 replicaCount: 1
 jarvis_api:
  image: harbor.bwgdi.com/library/jarvis-api:1.0.9
 jarvis_model:
  image: harbor.bwgdi.com/library/jarvis-models:0.0.1
 jarvis_adapter:
  image: harbor.bwgdi.com/library/adapter-test:0.0.1
  endpoint: "http://vllm-leader-nodeport:8080"
 resources: {}  
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
 # This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
 livenessProbe:
  httpGet:
    path: /
    port: http
 readinessProbe:
  httpGet:
    path: /
    port: http
 # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
 service:
  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: ClusterIP
  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 80
 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
 ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 # Additional volumes on the output Deployment definition.
 volumes: 
 - name: nfs-volume
  nfs:
    server: "10.6.80.11"
    path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/"
 # - name: foo
 #   secret:
 #     secretName: mysecret
 #     optional: false
 # Additional volumeMounts on the output Deployment definition.
 volumeMounts: []
 # - name: foo
 #   mountPath: "/etc/foo"
 #   readOnly: true
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/jarvis/metadata.yaml
+++ b/jarvis/metadata.yaml
@ -0,0 +1,43 @@
 application_name: &application_name jarvis 
 distributed: 
  method: helm
  release_name: *application_name
  chart: jarvis
  sets:
    jarvis_api:
      image: harbor.bwgdi.com/library/jarvis-api:1.0.9
    jarvis_adapter:
      image: harbor.bwgdi.com/library/adapter-test:0.0.1
      endpoint: "http://vllm-leader-nodeport:8080"
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: jarvis-api-service
    port: 30083
    url: ~
  pod:
    name: jarvis-
 monolithic: 
  method: helm
  release_name: *application_name
  chart: jarvis
  sets:
    jarvis_api:
      image: harbor.bwgdi.com/library/jarvis-api:1.0.9
    jarvis_adapter:
      image: harbor.bwgdi.com/library/adapter-test:0.0.1
      endpoint: "http://vllm-leader-nodeport:8080"
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: jarvis-api-service
    port: 30083
    url: ~
  pod:
    name: jarvis-
--- a/jupyter/jupyter/Chart.yaml
+++ b/jupyter/jupyter/Chart.yaml
@ -0,0 +1,17 @@
 apiVersion: v1
 appVersion: 6.0.3
 description: Helm for jupyter single server with pyspark support
 home: https://jupyter.org
 icon: https://jupyter.org/assets/main-logo.svg
 keywords:
 - jupyter
 - notebook
 - spark
 maintainers:
 - email: cgiraldo@gradiant.org
  name: cgiraldo
 name: jupyter
 sources:
 - https://github.com/gradiant/charts
 - https://github.com/astrobounce/helm-jupyter
 version: 0.1.6
--- a/jupyter/jupyter/README.md
+++ b/jupyter/jupyter/README.md
@ -0,0 +1,34 @@
 jupyter
 =======
 Helm for jupyter single server with pyspark support.
 For jupyterhub chart see [zero-to-jupyterhub](https://zero-to-jupyterhub.readthedocs.io/en/latest/).
 Current chart version is `0.1.0`
 Source code can be found [here]((https://github.com/gradiant/charts/charts/jupyter)
 ## Chart Values
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | affinity | object | `{}` |  |
 | image.pullPolicy | string | `"IfNotPresent"` |  |
 | image.repository | string | `"gradiant/jupyter"` |  |
 | image.tag | string | `"6.0.1"` |  |
 | ingress.annotations | object | `{}` |  |
 | ingress.enabled | bool | `false` |  |
 | ingress.hosts[0] | string | `"jupyter.127-0-0-1.nip"` |  |
 | ingress.path | string | `"/"` |  |
 | ingress.tls | list | `[]` |  |
 | lab | bool | `true` |  |
 | nodeSelector | object | `{}` |  |
 | persistence.accessMode | string | `"ReadWriteOnce"` |  |
 | persistence.enabled | bool | `true` |  |
 | persistence.size | string | `"50Gi"` |  |
 | persistence.storageClass | string | `nil` |  |
 | resources | object | `{}` |  |
 | service.externalPort | int | `8888` |  |
 | service.nodePort.http | string | `nil` |  |
 | service.type | string | `"ClusterIP"` |  |
 | tolerations | list | `[]` |  |
--- a/jupyter/jupyter/templates/NOTES.txt
+++ b/jupyter/jupyter/templates/NOTES.txt
@ -0,0 +1,23 @@
 1. Get access token from jupyter server log:
   kubectl logs -f -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }}
 1. Create a port-forward to the jupyter:
   kubectl port-forward -n {{ .Release.Namespace }} svc/{{ include "jupyter.fullname" . }} 8888:{{ .Values.service.externalPort }}
 Then open the ui in your browser and use the access token:
   open http://localhost:88888
 If you set up your own password, remember to restart jupyter server to update the configuration.
  File -> Shut Down  
 {{- if .Values.ingress.enabled }}
 Ingress is enabled:
 {{- range .Values.ingress.tls }}
 {{- range .hosts }}
  open https://{{ . }}
 {{- end }}
 {{- end }}
 {{- range .Values.ingress.hosts }}
  open http://{{ . }}
 {{- end }}
 {{- end }}
--- a/jupyter/jupyter/templates/_helpers.tpl
+++ b/jupyter/jupyter/templates/_helpers.tpl
@ -0,0 +1,32 @@
 {{/* vim: set filetype=mustache: */}}
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "jupyter.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
 {{- end -}}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 */}}
 {{- define "jupyter.fullname" -}}
 {{- if .Values.fullnameOverride -}}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
 {{- else -}}
 {{- $name := default .Chart.Name .Values.nameOverride -}}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
 {{- end -}}
 {{- end -}}
 {{/*
 Standard Labels from Helm documentation https://helm.sh/docs/chart_best_practices/#labels-and-annotations
 */}}
 {{- define "jupyter.labels" -}}
 helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
 app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
 app.kubernetes.io/instance: {{ .Release.Name | quote }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 app.kubernetes.io/part-of: {{ .Chart.Name }}
 {{- end -}}
--- a/jupyter/jupyter/templates/git-notebooks-configmap.yaml
+++ b/jupyter/jupyter/templates/git-notebooks-configmap.yaml
@ -0,0 +1,36 @@
 {{- if .Values.gitNotebooks }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ include "jupyter.fullname" . }}-git-notebooks
  labels:
    app.kubernetes.io/name: {{ include "jupyter.name" . }}
    {{- include "jupyter.labels" . | nindent 4 }}
 data:
  git-notebooks.sh: |-
    #!/bin/sh
    set -x
    cd /home/jovyan
    {{- if .Values.gitNotebooks.secretName }}
    cp -r /tmp/.ssh /root/
    chmod 600 /root/.ssh/*
    {{- else }}
    mkdir /root/.ssh
    {{- end }}
    echo "Loading notebooks from git repo"
    {{- range .Values.gitNotebooks.repos }}
    if [ ! -d "/home/jovyan/{{ .name }}" ]
    then
      echo "Cloning {{ .name }} notebook repository"
      {{- if or (hasPrefix "git" .repo) (hasPrefix "ssh" .repo) }}
      ssh-keyscan {{ .repo | regexFind "@([a-zA-Z0-9.]*)" | replace "@" "" }} >> ~/.ssh/known_hosts
      {{- end }}
      git clone {{ .repo }} {{ .name }}
    else 
      echo "{{ .name }} notebook repository already cloned"
    fi
    {{- end }}
    # exit code 0 to continue deployment even if git clone fails
    exit 0
 {{- end }}
--- a/jupyter/jupyter/templates/ingress.yaml
+++ b/jupyter/jupyter/templates/ingress.yaml
@ -0,0 +1,39 @@
 {{- if .Values.ingress.enabled -}}
 {{- $fullName := include "jupyter.fullname" . -}}
 {{- $ingressPath := .Values.ingress.path -}}
 apiVersion: extensions/v1beta1
 kind: Ingress
 metadata:
  name: {{ $fullName }}
  labels:
    app.kubernetes.io/name: {{ include "jupyter.name" . }}
    {{- include "jupyter.labels" . | nindent 4 }}
    {{- if .Values.ingress.labels }}
    {{ toYaml .Values.ingress.labels | indent 4 }}
    {{- end }}
 {{- with .Values.ingress.annotations }}
  annotations:
 {{ toYaml . | indent 4 }}
 {{- end }}
 spec:
 {{- if .Values.ingress.tls }}
  tls:
  {{- range .Values.ingress.tls }}
    - hosts:
      {{- range .hosts }}
        - {{ . }}
      {{- end }}
      secretName: {{ .secretName }}
  {{- end }}
 {{- end }}
  rules:
  {{- range .Values.ingress.hosts }}
    - host: {{ . }}
      http:
        paths:
          - path: {{ $ingressPath }}
            backend:
              serviceName: {{ $fullName }}
              servicePort: web
  {{- end }}
 {{- end }}
--- a/jupyter/jupyter/templates/service.yaml
+++ b/jupyter/jupyter/templates/service.yaml
@ -0,0 +1,20 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "jupyter.fullname" . }}
  labels:
    app.kubernetes.io/name: {{ include "jupyter.name" . }}
    {{- include "jupyter.labels" . | nindent 4 }}
 spec:
  type: {{ .Values.service.type }}
  selector:
    app.kubernetes.io/name: {{ include "jupyter.name" . }}
    app.kubernetes.io/instance: {{ .Release.Name }}
  ports:
    - name: web
      protocol: TCP
      port: {{ .Values.service.externalPort | default 8888 }}
      {{- if (and (eq .Values.service.type "NodePort") (not (empty .Values.service.nodePort.http))) }}
      nodePort: {{ .Values.service.nodePort.http }}
      {{- end }}
      targetPort: 8888
--- a/jupyter/jupyter/templates/statefulset.yaml
+++ b/jupyter/jupyter/templates/statefulset.yaml
@ -0,0 +1,118 @@
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
  name: {{ include "jupyter.fullname" . }}
  labels:
    app.kubernetes.io/name: {{ include "jupyter.name" . }}
    {{- include "jupyter.labels" . | nindent 4 }}
 spec:
  selector:
    matchLabels:
      app.kubernetes.io/name: {{ include "jupyter.name" . }}
      app.kubernetes.io/instance: {{ .Release.Name | quote }}
  serviceName: {{ include "jupyter.fullname" . }} 
  replicas: 1
  template:
    metadata:
      labels:
        app.kubernetes.io/name: {{ include "jupyter.name" . }}
        {{- include "jupyter.labels" . | nindent 8}}
    spec:
      {{- if .Values.gitNotebooks }}
      initContainers:
      - name: git-notebooks
        image: alpine/git
        command:
        - /bin/bash
        - /git-notebooks.sh
        volumeMounts:
        - name: git-notebooks
          mountPath: /git-notebooks.sh
          subPath: git-notebooks.sh
        - name: jupyter
          mountPath: /home/jovyan
        {{- if .Values.gitNotebooks.secretName }}
        - name: git-secret
          mountPath: "/tmp/.ssh"
        {{- end }}
      {{- end }}
      containers:
      - name: jupyter
        image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
        imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
        env:
        - name: JUPYTER_ENABLE_LAB
          value: "{{ .Values.lab }}"
        - name: JPY_USER
          value: "jovyan"
        args:
          - start-notebook.sh
          - --ip=0.0.0.0
          - --user="jovyan"
        ports:
        - name: web
          containerPort: 8888
          protocol: TCP
        resources:
 {{ toYaml .Values.resources | indent 10 }}
        readinessProbe:
          httpGet:
            path: /
            port: 8888
          initialDelaySeconds: 60
          timeoutSeconds: 15
        livenessProbe:
          httpGet:
            path: /
            port: 8888
          initialDelaySeconds: 60
          timeoutSeconds: 15
        volumeMounts:
        - name: jupyter
          mountPath: /home/jovyan
      volumes:
      {{- if .Values.gitNotebooks }}
      - name: git-notebooks
        configMap:
          name: {{ include "jupyter.fullname" . }}-git-notebooks
      {{- if .Values.gitNotebooks.secretName }}
      - name: git-secret
        secret:
          secretName: {{ .Values.gitNotebooks.secretName }}
      {{- end }}
      {{- end }}
      {{- if not .Values.persistence.enabled }}
      - name: jupyter
        emptyDir: {}
      {{- end }}
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
  {{- if .Values.persistence.enabled }}
  volumeClaimTemplates:
  - metadata:
      name: jupyter
    spec:
      accessModes: [ "ReadWriteOnce" ]
      resources:
        requests:
          storage: "{{ .Values.persistence.size }}"
      {{- if .Values.persistence.storageClass }}
      {{- if (eq "-" .Values.persistence.storageClass) }}
      storageClassName: ""
      {{- else }}
      storageClassName: "{{ .Values.persistence.storageClass }}"
      {{- end }}
      {{- end }}
  {{- end }}
--- a/jupyter/jupyter/values.yaml
+++ b/jupyter/jupyter/values.yaml
@ -0,0 +1,64 @@
 # Default values for jupyter.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 image:
  repository: gradiant/jupyter
  tag: 6.0.3
  pullPolicy: IfNotPresent
 lab: true
 #gitNotebooks:
 #  secretName: the name of the secret with ssh keys
 #  repos:
 #    - name: gradiant
 #      repo: https://github.com/Gradiant/notebooks.git 
 #    - name: grad-git
 #      repo: git@github.com:Gradiant/notebooks.git
 service:
  type: ClusterIP
  externalPort: 8888
  nodePort:
    http:
 persistence:
  enabled: true
  storageClass:
  accessMode: ReadWriteOnce
  size: 50Gi
 ## Ingress configuration
 ## Ref: https://kubernetes.io/docs/concepts/services-networking/ingress/
 ##
 ingress:
  enabled: false
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  path: /
  hosts:
    - jupyter.127-0-0-1.nip.io
  tls: []
    # - secretName: jupyter-tls
    #   hosts:
    #     - jupyter.local
 resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/jupyter/metadata.yaml
+++ b/jupyter/metadata.yaml
@ -0,0 +1,55 @@
 application_name: &application_name jupyter
 distributed: 
  method: helm
  release_name: *application_name
  chart: jupyter
  sets:
    image: 
      repository: gradiant/jupyter
      tag: 6.0.3
      pullPolicy: IfNotPresent
    resources: 
      limits:
        nvidia.com/gpu: 0
    nodeSelector: 
      resource-group: gpu_5880
    service:
      type: NodePort
      nodePort:
        http: 30888
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30888
    url: ~
  pod:
    name: jupyter-
 monolithic: 
  method: helm
  release_name: *application_name
  chart: jupyter
  sets:
    image: 
      repository: gradiant/jupyter
      tag: 6.0.3
      pullPolicy: IfNotPresent
    resources: 
      limits:
        nvidia.com/gpu: 0
    nodeSelector: 
      resource-group: gpu_5880
    service:
      type: NodePort
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30888
    url: ~
  pod:
    name: jupyter-
--- a/llama-factory/llama-factory/.helmignore
+++ b/llama-factory/llama-factory/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/llama-factory/llama-factory/Chart.yaml
+++ b/llama-factory/llama-factory/Chart.yaml
@ -0,0 +1,25 @@
 apiVersion: v2
 name: Llama-factory
 description: A Helm chart for deploying vLLM with NFS storage
 annotations:
  "helm.sh/resource-policy": keep  # 防止资源被意外删除
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/llama-factory/llama-factory/templates/llama.yaml
+++ b/llama-factory/llama-factory/templates/llama.yaml
@ -0,0 +1,159 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama-factory") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: llamafactory
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy:  IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: llama-leader
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : USE_RAY
                value: "1"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            command:
              - sh
              - -c
              -  "llamafactory-cli webui"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 7860
                name: http
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: llama-worker
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "echo $(LWS_LEADER_ADDRESS);
                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
 {{- end }}
--- a/llama-factory/llama-factory/templates/nfs-pv.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Values.app }}-pv-model
 spec:
  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
  capacity:
    storage: {{ .Values.nfs.pvSize }}
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: {{ .Values.nfs.path }}
    server: {{ .Values.nfs.server }}
--- a/llama-factory/llama-factory/templates/nfs-pvc.yaml
+++ b/llama-factory/llama-factory/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Values.app }}-pvc-model
  annotations:
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: {{ .Values.nfs.pvcSize }}
  volumeName: {{ .Values.app }}-pv-model 
--- a/llama-factory/llama-factory/templates/services.yaml
+++ b/llama-factory/llama-factory/templates/services.yaml
@ -0,0 +1,33 @@
 #apiVersion: v1
 #kind: Service
 #metadata:
 #  name: infer-leader-loadbalancer
 #spec:
 #  type: LoadBalancer
 #  selector:
 #    leaderworkerset.sigs.k8s.io/name: infer
 #    role: leader
 #  ports:
 #    - protocol: TCP
 #      port: 8080
 #      targetPort: 8080
 #
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Values.app }}-leader-nodeport
 spec:
  type: NodePort
  {{- if gt (int .Values.workerSize) 1 }}
  selector:
    leaderworkerset.sigs.k8s.io/name: llamafactory 
    role: leader
  {{- else }}
  selector:
    app: llama-factory 
  {{- end }}
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 7860
--- a/llama-factory/llama-factory/templates/single.yaml
+++ b/llama-factory/llama-factory/templates/single.yaml
@ -0,0 +1,51 @@
 {{- if eq (int .Values.workerSize) 1 }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ .Release.Name }}
 spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: llama-factory  
  template:
    metadata:
      labels:
        app: llama-factory
    spec:
      containers:
      - name: llama-factory
        image: {{ .Values.llama.image }}
        imagePullPolicy:  IfNotPresent
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
        command:
          - sh
          - -c
          - "llamafactory-cli webui"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
            memory: {{ .Values.resources.memoryLimit }}
            ephemeral-storage: 10Gi
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.resources.cpuRequest }}
        ports:
        - containerPort: 7860 
          name: http
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.resources.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: {{ .Values.app}}-pvc-model
 {{- end }}
--- a/llama-factory/llama-factory/values.yaml
+++ b/llama-factory/llama-factory/values.yaml
@ -0,0 +1,44 @@
 # Default values for vllm-app.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # 模型配置
 model:
  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
  localMountPath: "/Model"                  # PVC 固定挂载路径
  huggingfaceToken: "<your-hf-token>"
  download:
    enabled: false                                  # 启用自动下载
    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
 # 功能选择
 app: "llama-factory"
 resources:
  gpuLimit: 1
  cpuRequest: 8
  memoryLimit: "16Gi"
  shmSize: "20Gi"
 llama:
  image: "docker.io/library/one-click:v1"
 # NFS PV/PVC 配置
 nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
  storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"
 # LeaderWorkerSet 配置
 replicaCount: 1
 workerSize: 2
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/llama-factory/metadata.yaml
+++ b/llama-factory/metadata.yaml
@ -0,0 +1,53 @@
 application_name: &application_name llama-factory
 distributed:
  method: helm
  release_name: *application_name
  chart: llama-factory
  sets:
    app: llama-factory
    model:
      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 8
      memoryLimit: "16Gi"
      shmSize: "15Gi"
    llama:
      image: "docker.io/library/one-click:v1"
    workerSize: 2
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30080
    url: ~
  pod:
    name: llamafactory
 monolithic:
  method: helm
  release_name: *application_name
  chart: llama-factory
  sets:
    app: llama-factory
    model:
      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 8
      memoryLimit: "16Gi"
      shmSize: "15Gi"
    llama:
      image: "docker.io/library/one-click:v1"
    workerSize: 1
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30080
    url: ~
  pod:
    name: llama-factory
--- a/melotts/melotts/.helmignore
+++ b/melotts/melotts/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/melotts/melotts/Chart.yaml
+++ b/melotts/melotts/Chart.yaml
@ -0,0 +1,24 @@
 apiVersion: v2
 name: jarvis
 description: A Helm chart for Kubernetes
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/melotts/melotts/templates/NOTES.txt
+++ b/melotts/melotts/templates/NOTES.txt
@ -0,0 +1,22 @@
 1. Get the application URL by running these commands:
 {{- if .Values.ingress.enabled }}
 {{- range $host := .Values.ingress.hosts }}
  {{- range .paths }}
  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
  {{- end }}
 {{- end }}
 {{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "jarvis.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "jarvis.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "jarvis.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "jarvis.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/melotts/melotts/templates/_helpers.tpl
+++ b/melotts/melotts/templates/_helpers.tpl
@ -0,0 +1,62 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "jarvis.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "jarvis.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "jarvis.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "jarvis.labels" -}}
 helm.sh/chart: {{ include "jarvis.chart" . }}
 {{ include "jarvis.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "jarvis.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "jarvis.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "jarvis.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "jarvis.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/melotts/melotts/templates/melotts.yaml
+++ b/melotts/melotts/templates/melotts.yaml
@ -0,0 +1,81 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ .Release.Name }}-dp 
  # namespace: jarvis-models
  labels:
    app: {{ .Release.Name }}
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ .Release.Name }}
    spec:
      #hostNetwork: true
      # --- START: Add this section for image pull secrets ---
      runtimeClassName: nvidia 
      imagePullSecrets:
      - name: regcred # This MUST match the name of the secret you just created
      # --- END: Add this section ---
      containers:
      - name: melo
        image: {{ .Values.melotts.image }}
        imagePullPolicy: IfNotPresent # Ensure it always tries to pull, especially on first deploy
          #command:
          #  - /bin/bash
          #  - -c
          #  - "bash && sleep infinity"
        ports:
        - containerPort: 5000 # The port your application listens on inside the container
          #hostPort: 8880    # The port on the host machine (--network=host combined with -p 8880:8080)
          protocol: TCP
        resources:  # Add this section
          requests:
            cpu: 2  # Example: 100 millicores (0.1 CPU)
            memory: 4Gi # Example: 128 mebibytes
          limits:
            cpu: 2  # Example: Limit to 500 millicores (0.5 CPU)
            memory: 6Gi # Example: Limit to 512 mebibytes
            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
        volumeMounts:
        - name: weight-volume 
          mountPath: /models
        - name: weight-volume
          mountPath: /usr/local/nltk_data
          subPath: nltk_data
      volumes:
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Release.Name }}-service
  # namespace: jarvis-models
  labels:
    app: {{ .Release.Name }}
 spec:
  selector:
    app: {{ .Release.Name }}
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 5000
  type: NodePort
--- a/melotts/melotts/templates/nfs-pv.yaml
+++ b/melotts/melotts/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Values.app }}-pv-model
 spec:
  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
  capacity:
    storage: {{ .Values.nfs.pvSize }}
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: {{ .Values.nfs.path }}
    server: {{ .Values.nfs.server }}
--- a/melotts/melotts/templates/nfs-pvc.yaml
+++ b/melotts/melotts/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Values.app }}-pvc-model
  annotations:
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: {{ .Values.nfs.pvcSize }}
  volumeName: {{ .Values.app }}-pv-model 
--- a/melotts/melotts/values.yaml
+++ b/melotts/melotts/values.yaml
@ -0,0 +1,89 @@
 # Default values for jarvis.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
 replicaCount: 1
 app: "melotts"
 melotts:
  image: harbor.bwgdi.com/library/melotts:0.0.2
 jarvis_adapter:
  image: harbor.bwgdi.com/library/adapter-test:0.0.1
  endpoint: "http://vllm-leader-nodeport:8080"
 nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Voice/MeloTTS"
  storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"
 resources:   
  gpuLimit: 1
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
 # This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
 livenessProbe:
  httpGet:
    path: /
    port: http
 readinessProbe:
  httpGet:
    path: /
    port: http
 # This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
 service:
  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: ClusterIP
  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 80
 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
 ingress:
  enabled: false
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local
 # Additional volumes on the output Deployment definition.
 volumes: []
 # - name: foo
 #   secret:
 #     secretName: mysecret
 #     optional: false
 # Additional volumeMounts on the output Deployment definition.
 volumeMounts: []
 # - name: foo
 #   mountPath: "/etc/foo"
 #   readOnly: true
 nodeSelector: 
  resource-group: gpu_5880
 tolerations: []
 affinity: {}
--- a/melotts/metadata.yaml
+++ b/melotts/metadata.yaml
@ -0,0 +1,35 @@
 application_name: &application_name melotts 
 distributed: 
  method: helm
  release_name: *application_name
  chart: melotts
  sets:
    jarvis_api:
      image: harbor.bwgdi.com/library/melotts:0.0.2
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: melo-service
    port: 32147
    url: ~
  pod:
    name: *application_name
 monolithic: 
  method: helm
  release_name: *application_name
  chart: melotts
  sets:
    jarvis_api:
      image: harbor.bwgdi.com/library/melotts:0.0.2
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: melo-service
    port: 32147
    url: ~
  pod:
    name: *application_name
--- a/vllm/metadata.yaml
+++ b/vllm/metadata.yaml
@ -0,0 +1,53 @@
 application_name: &application_name vllm
 distributed:
  method: helm
  release_name: *application_name
  chart: vllm-app
  sets:
    app: vllm
    model:
      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 8
      memoryLimit: "16Gi"
      shmSize: "15Gi"
    workerSize: 2
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30080
    url: ~
    paths:
      docs_path: /docs
      redoc_path: /redoc
  pod:
    name: infer-0
 monolithic:
  method: helm
  release_name: *application_name
  chart: vllm-app
  sets:
    app: vllm
    model:
      huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 8
      memoryLimit: "16Gi"
      shmSize: "15Gi"
    workerSize: 1
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30080
    url: ~
  pod:
    name: vllm
--- a/vllm/vllm-app/.helmignore
+++ b/vllm/vllm-app/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/vllm/vllm-app/Chart.yaml
+++ b/vllm/vllm-app/Chart.yaml
@ -0,0 +1,25 @@
 apiVersion: v2
 name: vllm-app
 description: A Helm chart for deploying vLLM with NFS storage
 annotations:
  "helm.sh/resource-policy": keep  # 防止资源被意外删除
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/vllm/vllm-app/templates/llama.yaml
+++ b/vllm/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: infer
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy:  IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: llama-leader
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : USE_RAY
                value: "1"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 7860
                name: http
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: llama-worker
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "echo $(LWS_LEADER_ADDRESS);
                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
 {{- end }}
--- a/vllm/vllm-app/templates/lmdeploy_lws.yaml
+++ b/vllm/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: infer 
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: lmdeploy-leader
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: lmdeploy-worker
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
 {{- end }}
--- a/vllm/vllm-app/templates/lws.yaml
+++ b/vllm/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: infer
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: vllm-leader
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model 
    workerTemplate:
      spec:
        containers:
          - name: vllm-worker
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
 {{- end }}
--- a/vllm/vllm-app/templates/model-download-job.yaml
+++ b/vllm/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
 {{- if .Values.model.download.enabled }}
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: {{ .Release.Name }}-download-model
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
    "helm.sh/hook-weight": "-10"              # 优先执行
    "helm.sh/hook-delete-policy": hook-succeeded
 spec:
  template:
    spec:
      restartPolicy: OnFailure
      containers:
      - name: downloader
        image: {{ .Values.model.download.image }}
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            if [ -d "$DEST_DIR" ]; then
              echo "Model already exists at $DEST_DIR"
            else
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            fi
        volumeMounts:
        - name: model-storage
          mountPath: {{ .Values.model.localMountPath }}
      volumes:
      - name: model-storage
        persistentVolumeClaim:
          claimName: nfs-pvc-model  # 复用之前的 PVC
 {{- end }}
--- a/vllm/vllm-app/templates/nfs-pv.yaml
+++ b/vllm/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Values.app }}-pv-model
 spec:
  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
  capacity:
    storage: {{ .Values.nfs.pvSize }}
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: {{ .Values.nfs.path }}
    server: {{ .Values.nfs.server }}
--- a/vllm/vllm-app/templates/nfs-pvc.yaml
+++ b/vllm/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Values.app }}-pvc-model
  annotations:
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: {{ .Values.nfs.pvcSize }}
  volumeName: {{ .Values.app }}-pv-model 
--- a/vllm/vllm-app/templates/services.yaml
+++ b/vllm/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
 #apiVersion: v1
 #kind: Service
 #metadata:
 #  name: infer-leader-loadbalancer
 #spec:
 #  type: LoadBalancer
 #  selector:
 #    leaderworkerset.sigs.k8s.io/name: infer
 #    role: leader
 #  ports:
 #    - protocol: TCP
 #      port: 8080
 #      targetPort: 8080
 #
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Values.app }}-leader-nodeport
 spec:
  type: NodePort
  {{- if gt (int .Values.workerSize) 1 }}
  selector:
    leaderworkerset.sigs.k8s.io/name: infer 
    role: leader
  {{- else }}
  selector:
    app: vllm-app
  {{- end }}
  ports:
    - protocol: TCP
      port: 8080
      {{- if eq .Values.app "llama" }}
      targetPort: 7860
      {{- else }}
      targetPort: 8080
      {{- end }}
      nodePort: 30080
--- a/vllm/vllm-app/templates/single.yaml
+++ b/vllm/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
 {{- if eq (int .Values.workerSize) 1 }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: vllm
 spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: vllm-app  
  template:
    metadata:
      labels:
        app: vllm-app
    spec:
      initContainers:
     # 模型下载作为第一个 initContainer
      - name: download-model
        image: {{ .Values.model.download.image }}
        imagePullPolicy: IfNotPresent
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            # 检查模型是否存在，不存在则下载
            echo "DEST_DIR= $DEST_DIR"
            ls $DEST_DIR
            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
            if [ ! -f "$DEST_DIR/config.json" ]; then
              ls -l {{ .Values.model.localMountPath }}
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            else
              echo "Model already exists at $DEST_DIR"
            fi
        volumeMounts:
        - name: weight-volume
          mountPath: {{ .Values.model.localMountPath }}
      containers:
      - name: vllm-leader
        image: {{ .Values.vllm.image }}
        imagePullPolicy:  IfNotPresent
          #securityContext:
          #  capabilities:
          #    add: [ "IPC_LOCK" ]
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
            #- name: GLOO_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_IB_DISABLE
            #  value: "0"
            #- name: NCCL_DEBUG
            #  value: INFO
            #- name: NCCL_IB_HCA
            #  value: mlx5_0:1
            #- name: NCCL_IB_GID_INDEX
            #  value: "0" # 或 "7"，根据你的网络配置而定
        - name: RAY_DEDUP_LOGS
          value: "0"
        command:
          - sh
          - -c
          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
            echo 'Using single node ------------------------------------------'; 
            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
            memory: {{ .Values.resources.memoryLimit }}
            ephemeral-storage: 10Gi
              #rdma/rdma_shared_device_a: 10
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.resources.cpuRequest }}
        ports:
        - containerPort: 8080
          name: http
        readinessProbe:
          #tcpSocket:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 120
          periodSeconds: 20
          timeoutSeconds: 5
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.resources.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: {{ .Values.app}}-pvc-model
 {{- end }}
--- a/vllm/vllm-app/values.yaml
+++ b/vllm/vllm-app/values.yaml
@ -0,0 +1,58 @@
 # Default values for vllm-app.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # 模型配置
 model:
  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
  localMountPath: "/Model"                  # PVC 固定挂载路径
  huggingfaceToken: "<your-hf-token>"
  download:
    enabled: false                                  # 启用自动下载
    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
 # 功能选择
 app: "vllm"
 resources:
  gpuLimit: 1
  cpuRequest: 12
  memoryLimit: "16Gi"
  shmSize: "20Gi"
 # vLLM 应用配置
 vllm:
  image: "docker.io/vllm/vllm-openai:latest"
    #gpuLimit: 2 
    # cpuRequest: 12
    # memoryLimit: "12Gi"
    # shmSize: "15Gi"
 llama:
  image: "docker.io/library/one-click:v1"
 # lmdeploy  应用配置
 lmdeploy:
  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
    #  gpuLimit: 2
    #  cpuRequest: 12
    #  memoryLimit: "12Gi"
    #  shmSize: "15Gi"
 # NFS PV/PVC 配置
 nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
  storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"
 # LeaderWorkerSet 配置
 replicaCount: 1
 workerSize: 2
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/vllm/vllm-serve/.helmignore
+++ b/vllm/vllm-serve/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/vllm/vllm-serve/Chart.yaml
+++ b/vllm/vllm-serve/Chart.yaml
@ -0,0 +1,25 @@
 apiVersion: v2
 name: vllm-serve
 description: A Helm chart for deploying vLLM with NFS storage
 annotations:
  "helm.sh/resource-policy": keep  # 防止资源被意外删除
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/vllm/vllm-serve/templates/NOTES.txt
+++ b/vllm/vllm-serve/templates/NOTES.txt
@ -0,0 +1,16 @@
 1. Get the application URL by running these commands:
 {{- if contains "NodePort" .Values.svc.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/vllm/vllm-serve/templates/_helpers.tpl
+++ b/vllm/vllm-serve/templates/_helpers.tpl
@ -0,0 +1,62 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "vllm-serve.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "vllm-serve.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "vllm-serve.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "vllm-serve.labels" -}}
 helm.sh/chart: {{ include "vllm-serve.chart" . }}
 {{ include "vllm-serve.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "vllm-serve.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "vllm-serve.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "vllm-serve.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
--- a/vllm/vllm-serve/templates/lws.yaml
+++ b/vllm/vllm-serve/templates/lws.yaml
@ -0,0 +1,188 @@
 {{- if gt (int .Values.workerSize) 1 }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: {{ .Release.Name }}
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: vllm-leader
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Release.Name }}-pvc-model 
        {{- with .Values.nodeSelector }}
        nodeSelector:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.affinity }}
        affinity:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.tolerations }}
        tolerations:
          {{- toYaml . | nindent 10 }}
        {{- end }}
    workerTemplate:
      spec:
        containers:
          - name: vllm-worker
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Release.Name }}-pvc-model
        {{- with .Values.nodeSelector }}
        nodeSelector:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.affinity }}
        affinity:
          {{- toYaml . | nindent 10 }}
        {{- end }}
        {{- with .Values.tolerations }}
        tolerations:
          {{- toYaml . | nindent 10 }}
        {{- end }} 
 {{- end }}
--- a/vllm/vllm-serve/templates/nfs-pvc.yaml
+++ b/vllm/vllm-serve/templates/nfs-pvc.yaml
@ -0,0 +1,28 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Release.Name }}-pv-model
 spec:
  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
  capacity:
    storage: {{ .Values.nfs.pvSize }}
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: {{ .Values.nfs.path }}
    server: {{ .Values.nfs.server }}
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Release.Name }}-pvc-model
  annotations:
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: {{ .Values.nfs.pvcSize }}
  volumeName: {{ .Release.Name }}-pv-model 
--- a/vllm/vllm-serve/templates/services.yaml
+++ b/vllm/vllm-serve/templates/services.yaml
@ -0,0 +1,35 @@
 #apiVersion: v1
 #kind: Service
 #metadata:
 #  name: infer-leader-loadbalancer
 #spec:
 #  type: LoadBalancer
 #  selector:
 #    leaderworkerset.sigs.k8s.io/name: infer
 #    role: leader
 #  ports:
 #    - protocol: TCP
 #      port: 8080
 #      targetPort: 8080
 #
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Release.Name }}-svc
 spec:
  type: {{ .Values.svc.type | default "NodePort" }}
  {{- if gt (int .Values.workerSize) 1 }}
  selector:
    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
    role: leader
  {{- else }}
  selector:
    app: {{ .Release.Name }}
  {{- end }}
  ports:
    - protocol: TCP
      port: {{ .Values.svc.port | default 8080 }}
      targetPort: {{ .Values.svc.port | default 8080 }}
      nodePort: {{ .Values.svc.nodePort | default 30080 }}
--- a/vllm/vllm-serve/templates/single.yaml
+++ b/vllm/vllm-serve/templates/single.yaml
@ -0,0 +1,108 @@
 {{- if eq (int .Values.workerSize) 1 }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ .Release.Name }}
 spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app:  {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ .Release.Name }}
    spec:
      initContainers:
     # 模型下载作为第一个 initContainer
      - name: download-model
        image: {{ .Values.model.download.image }}
        imagePullPolicy: IfNotPresent
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            # 检查模型是否存在，不存在则下载
            echo "DEST_DIR= $DEST_DIR"
            if [ ! -f "$DEST_DIR/config.json" ]; then
              ls -l {{ .Values.model.localMountPath }}
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            else
              echo "Model already exists at $DEST_DIR"
            fi
        volumeMounts:
        - name: weight-volume
          mountPath: {{ .Values.model.localMountPath }}
      containers:
      - name: vllm-pod
        image: {{ .Values.vllm.image }}
        imagePullPolicy:  IfNotPresent
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
        - name: RAY_DEDUP_LOGS
          value: "0"
        command:
          - sh
          - -c
          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
            echo 'Using single node ------------------------------------------'; 
            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
            memory: {{ .Values.resources.memoryLimit }}
            ephemeral-storage: 10Gi
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.resources.cpuRequest }}
        ports:
        - containerPort: 8080
          name: http
        readinessProbe:
          #tcpSocket:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 120
          periodSeconds: 20
          timeoutSeconds: 5
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.resources.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: {{ .Release.Name }}-pvc-model          
      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
 {{- end }}
--- a/vllm/vllm-serve/values.yaml
+++ b/vllm/vllm-serve/values.yaml
@ -0,0 +1,75 @@
 # Default values for vllm-app.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
 imagePullSecrets: []
 imagePullPolicy: IfNotPresent
 # This is to override the chart name.
 nameOverride: ""
 fullnameOverride: ""
 # This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
 serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
 # 模型配置
 model:
  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
  localMountPath: "/Model"                  # PVC 固定挂载路径
  huggingfaceToken: "<your-hf-token>"
  download:                               # 启用自动下载
    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
 # 功能选择
 resources:
  gpuLimit: 1
  cpuRequest: 12
  memoryLimit: "16Gi"
  shmSize: "20Gi"
 svc:
  type: NodePort
  port: 80 
  targetPort: 8080 
  nodePort: 30080 
 # vLLM 应用配置
 vllm:
  image: "docker.io/vllm/vllm-openai:latest"
 llama:
  image: "docker.io/library/one-click:v1"
 # lmdeploy  应用配置
 lmdeploy:
  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
 # NFS PV/PVC 配置
 nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
  storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"
 # LeaderWorkerSet 配置
 replicaCount: 1
 workerSize: 2
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/webchat/metadata.yaml
+++ b/webchat/metadata.yaml
@ -0,0 +1,53 @@
 application_name: &application_name webchat
 distributed:
  method: helm
  release_name: *application_name
  chart: vllm-app
  sets:
    app: llama
    model:
      huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 8
      memoryLimit: "8Gi"
      shmSize: "15Gi"
    workerSize: 2
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30081
    url: ~
    paths:
      docs_path: /docs
      redoc_path: /redoc
  pod:
    name: *application_name
 monolithic:
  method: helm
  release_name: *application_name
  chart: vllm-app
  sets:
    app: vllm
    model:
      huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
    resources:
      gpuLimit: 1
      cpuRequest: 12
      memoryLimit: "8Gi"
      shmSize: "15Gi"
    workerSize: 1
    nodeSelector: {}
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    port: 30080
    url: ~
  pod:
    name: *application_name
--- a/webchat/vllm-app/.helmignore
+++ b/webchat/vllm-app/.helmignore
@ -0,0 +1,23 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 *.tmproj
 .vscode/
--- a/webchat/vllm-app/Chart.yaml
+++ b/webchat/vllm-app/Chart.yaml
@ -0,0 +1,25 @@
 apiVersion: v2
 name: vllm-app
 description: A Helm chart for deploying vLLM with NFS storage
 annotations:
  "helm.sh/resource-policy": keep  # 防止资源被意外删除
 # A chart can be either an 'application' or a 'library' chart.
 #
 # Application charts are a collection of templates that can be packaged into versioned archives
 # to be deployed.
 #
 # Library charts provide useful utilities or functions for the chart developer. They're included as
 # a dependency of application charts to inject those utilities and functions into the rendering
 # pipeline. Library charts do not define any templates and therefore cannot be deployed.
 type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
 version: 0.1.0
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
 appVersion: "1.16.0"
--- a/webchat/vllm-app/templates/llama.yaml
+++ b/webchat/vllm-app/templates/llama.yaml
@ -0,0 +1,165 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "llama") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: {{ .Release.Name }} 
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy:  IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: llama-leader
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : USE_RAY
                value: "1"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                 MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_NAME_OR_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
                 llamafactory-cli webchat {{ .Values.model.localMountPath }}/lws-config/qwen2_5_3B.yaml "
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 7860
                name: http
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: llama-worker
            image: {{ .Values.llama.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "echo $(LWS_LEADER_ADDRESS);
                bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
                  # - name : LMDEPLOY_EXECUTOR_BACKEND
                  #   value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
 {{- end }}
--- a/webchat/vllm-app/templates/lmdeploy_lws.yaml
+++ b/webchat/vllm-app/templates/lmdeploy_lws.yaml
@ -0,0 +1,170 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "lmdeploy") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: infer 
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: lmdeploy-leader
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                lmdeploy serve api_server $MODEL_PATH  --backend pytorch --tp $(({{ .Values.resources.gpuLimit }} * {{ .Values.workerSize }}))  --server-port 8080 --cache-max-entry-count 0.9"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
    workerTemplate:
      spec:
        containers:
          - name: lmdeploy-worker
            image: {{ .Values.lmdeploy.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash {{ .Values.model.localMountPath }}/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.lmdeploy.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
              - name : LMDEPLOY_EXECUTOR_BACKEND
                value: "ray"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: nfs-pvc-model
 {{- end }}
--- a/webchat/vllm-app/templates/lws.yaml
+++ b/webchat/vllm-app/templates/lws.yaml
@ -0,0 +1,166 @@
 {{- if and (gt (int .Values.workerSize) 1) (eq .Values.app "vllm") }}
 apiVersion: leaderworkerset.x-k8s.io/v1
 kind: LeaderWorkerSet
 metadata:
  name: infer
 spec:
  replicas: {{ .Values.replicaCount }}
  leaderWorkerTemplate:
    size: {{ .Values.workerSize }}
    restartPolicy: RecreateGroupOnPodRestart
    leaderTemplate:
      metadata:
        labels:
          role: leader
      spec:
        initContainers:
        # 模型下载作为第一个 initContainer
        - name: download-model
          image: {{ .Values.model.download.image }}
          imagePullPolicy: IfNotPresent
          env:
            - name: HF_ENDPOINT
              value: https://hf-mirror.com
            - name: HUGGING_FACE_HUB_TOKEN
              value: {{ .Values.model.huggingfaceToken }}
          command:
            - sh
            - -c
            - |
              MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
              DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
              # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
              # 检查模型是否存在，不存在则下载
              echo "DEST_DIR= $DEST_DIR"
              ls $DEST_DIR
              ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
              if [ ! -f "$DEST_DIR/config.json" ]; then
                ls -l {{ .Values.model.localMountPath }}
                echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
                wget https://hf-mirror.com/hfd/hfd.sh 
                chmod a+x hfd.sh
                apt install aria2 -y
                ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
                # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              else
                echo "Model already exists at $DEST_DIR"
              fi
          volumeMounts:
          - name: weight-volume
            mountPath: {{ .Values.model.localMountPath }}
        containers:
          - name: vllm-leader
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
                MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; 
                python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            ports:
              - containerPort: 8080
                name: http
            readinessProbe:
              tcpSocket:
                #httpGet:
                #path: /health
                port: 8080
              initialDelaySeconds: 120
              periodSeconds: 20
              timeoutSeconds: 5
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath:  {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model 
    workerTemplate:
      spec:
        containers:
          - name: vllm-worker
            image: {{ .Values.vllm.image }}
            imagePullPolicy:  IfNotPresent
            securityContext:
              capabilities:
                add: [ "IPC_LOCK" ]
            command:
              - sh
              - -c
              - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)"
            resources:
              limits:
                nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
                memory: {{ .Values.resources.memoryLimit }}
                ephemeral-storage: 10Gi
                rdma/rdma_shared_device_a: 10
              requests:
                ephemeral-storage: 10Gi
                cpu: {{ .Values.resources.cpuRequest }}
            env:
              # - name: HUGGING_FACE_HUB_TOKEN
              #   value: {{ .Values.vllm.huggingfaceToken }}
              - name: GLOO_SOCKET_IFNAME
                value: eth0
              - name: NCCL_SOCKET_IFNAME
                value: eth0
              - name: NCCL_IB_DISABLE
                value: "0"
              - name: NCCL_DEBUG
                value: INFO
              - name: NCCL_IB_HCA
                value: mlx5_0:1
              - name: NCCL_IB_GID_INDEX
                value: "0" # 或 "7"，根据你的网络配置而定
              - name: RAY_DEDUP_LOGS
                value: "0"
            volumeMounts:
              - mountPath: /dev/shm
                name: dshm
              - name: weight-volume
                mountPath: {{ .Values.model.localMountPath }}
        volumes:
        - name: dshm
          emptyDir:
            medium: Memory
            sizeLimit: {{ .Values.resources.shmSize }}
        - name: weight-volume
          persistentVolumeClaim:
            claimName: {{ .Values.app }}-pvc-model
 {{- end }}
--- a/webchat/vllm-app/templates/model-download-job.yaml
+++ b/webchat/vllm-app/templates/model-download-job.yaml
@ -0,0 +1,44 @@
 {{- if .Values.model.download.enabled }}
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: {{ .Release.Name }}-download-model
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade  # 在安装/升级前执行
    "helm.sh/hook-weight": "-10"              # 优先执行
    "helm.sh/hook-delete-policy": hook-succeeded
 spec:
  template:
    spec:
      restartPolicy: OnFailure
      containers:
      - name: downloader
        image: {{ .Values.model.download.image }}
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            if [ -d "$DEST_DIR" ]; then
              echo "Model already exists at $DEST_DIR"
            else
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            fi
        volumeMounts:
        - name: model-storage
          mountPath: {{ .Values.model.localMountPath }}
      volumes:
      - name: model-storage
        persistentVolumeClaim:
          claimName: nfs-pvc-model  # 复用之前的 PVC
 {{- end }}
--- a/webchat/vllm-app/templates/nfs-pv.yaml
+++ b/webchat/vllm-app/templates/nfs-pv.yaml
@ -0,0 +1,14 @@
 apiVersion: v1
 kind: PersistentVolume
 metadata:
  name: {{ .Values.app }}-pv-model
 spec:
  storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
  capacity:
    storage: {{ .Values.nfs.pvSize }}
  accessModes:
    - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  nfs:
    path: {{ .Values.nfs.path }}
    server: {{ .Values.nfs.server }}
--- a/webchat/vllm-app/templates/nfs-pvc.yaml
+++ b/webchat/vllm-app/templates/nfs-pvc.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: {{ .Values.app }}-pvc-model
  annotations:
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: {{ .Values.nfs.pvcSize }}
  volumeName: {{ .Values.app }}-pv-model 
--- a/webchat/vllm-app/templates/services.yaml
+++ b/webchat/vllm-app/templates/services.yaml
@ -0,0 +1,39 @@
 #apiVersion: v1
 #kind: Service
 #metadata:
 #  name: infer-leader-loadbalancer
 #spec:
 #  type: LoadBalancer
 #  selector:
 #    leaderworkerset.sigs.k8s.io/name: infer
 #    role: leader
 #  ports:
 #    - protocol: TCP
 #      port: 8080
 #      targetPort: 8080
 #
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ .Values.app }}-leader-nodeport
 spec:
  type: NodePort
  {{- if gt (int .Values.workerSize) 1 }}
  selector:
    leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} 
    role: leader
  {{- else }}
  selector:
    app: {{ .Release.Name }}
  {{- end }}
  ports:
    - protocol: TCP
      port: 8080
      {{- if eq .Values.app "llama" }}
      targetPort: 7860
      {{- else }}
      targetPort: 8080
      {{- end }}
      nodePort: 30081
--- a/webchat/vllm-app/templates/single.yaml
+++ b/webchat/vllm-app/templates/single.yaml
@ -0,0 +1,114 @@
 {{- if eq (int .Values.workerSize) 1 }}
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ .Release.Name }} 
 spec:
  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      app: {{ .Release.Name }}
  template:
    metadata:
      labels:
        app: {{ .Release.Name }}
    spec:
      initContainers:
     # 模型下载作为第一个 initContainer
      - name: download-model
        image: {{ .Values.model.download.image }}
        imagePullPolicy: IfNotPresent
        env:
          - name: HF_ENDPOINT
            value: https://hf-mirror.com
          - name: HUGGING_FACE_HUB_TOKEN
            value: {{ .Values.model.huggingfaceToken }}
        command:
          - sh
          - -c
          - |
            MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
            DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
            # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
            # 检查模型是否存在，不存在则下载
            echo "DEST_DIR= $DEST_DIR"
            ls $DEST_DIR
            ls -l  {{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}
            if [ ! -f "$DEST_DIR/config.json" ]; then
              ls -l {{ .Values.model.localMountPath }}
              echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
              wget https://hf-mirror.com/hfd/hfd.sh 
              chmod a+x hfd.sh
              apt install aria2 -y
              ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
              # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
            else
              echo "Model already exists at $DEST_DIR"
            fi
        volumeMounts:
        - name: weight-volume
          mountPath: {{ .Values.model.localMountPath }}
      containers:
      - name: vllm-leader
        image: {{ .Values.vllm.image }}
        imagePullPolicy:  IfNotPresent
          #securityContext:
          #  capabilities:
          #    add: [ "IPC_LOCK" ]
        env:
        - name: HUGGING_FACE_HUB_TOKEN
          value: {{ .Values.vllm.huggingfaceToken }}
            #- name: GLOO_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_SOCKET_IFNAME
            #  value: eth0
            #- name: NCCL_IB_DISABLE
            #  value: "0"
            #- name: NCCL_DEBUG
            #  value: INFO
            #- name: NCCL_IB_HCA
            #  value: mlx5_0:1
            #- name: NCCL_IB_GID_INDEX
            #  value: "0" # 或 "7"，根据你的网络配置而定
        - name: RAY_DEDUP_LOGS
          value: "0"
        command:
          - sh
          - -c
          - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
            echo 'Using single node ------------------------------------------'; 
            python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.vllm.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }}"
        resources:
          limits:
            nvidia.com/gpu: "{{ .Values.vllm.gpuLimit }}"
            memory: {{ .Values.vllm.memoryLimit }}
            ephemeral-storage: 10Gi
              #rdma/rdma_shared_device_a: 10
          requests:
            ephemeral-storage: 10Gi
            cpu: {{ .Values.vllm.cpuRequest }}
        ports:
        - containerPort: 8080
          name: http
        readinessProbe:
          #tcpSocket:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 120
          periodSeconds: 20
          timeoutSeconds: 5
        volumeMounts:
          - mountPath: /dev/shm
            name: dshm
          - name: weight-volume
            mountPath:  {{ .Values.model.localMountPath }}
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: {{ .Values.vllm.shmSize }}
      - name: weight-volume
        persistentVolumeClaim:
          claimName: nfs-pvc-model
 {{- end }}
--- a/webchat/vllm-app/values.yaml
+++ b/webchat/vllm-app/values.yaml
@ -0,0 +1,58 @@
 # Default values for vllm-app.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 # 模型配置
 model:
  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
  localMountPath: "/Model"                  # PVC 固定挂载路径
  huggingfaceToken: "<your-hf-token>"
  download:
    enabled: false                                  # 启用自动下载
    image: "docker.io/vllm/vllm-openai:latest"  # 包含 huggingface-cli 的镜像
 # 功能选择
 app: "vllm"
 resources:
  gpuLimit: 1
  cpuRequest: 12
  memoryLimit: "16Gi"
  shmSize: "20Gi"
 # vLLM 应用配置
 vllm:
  image: "docker.io/vllm/vllm-openai:latest"
    #gpuLimit: 2 
    # cpuRequest: 12
    # memoryLimit: "12Gi"
    # shmSize: "15Gi"
 llama:
  image: "docker.io/library/one-click:v1"
 # lmdeploy  应用配置
 lmdeploy:
  image: "docker.io/openmmlab/lmdeploy:latest-cu12"
    #  gpuLimit: 2
    #  cpuRequest: 12
    #  memoryLimit: "12Gi"
    #  shmSize: "15Gi"
 # NFS PV/PVC 配置
 nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
  storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"
 # LeaderWorkerSet 配置
 replicaCount: 1
 workerSize: 2
 nodeSelector: {}
 tolerations: []
 affinity: {}
--- a/webui/metadata.yaml
+++ b/webui/metadata.yaml
@ -0,0 +1,51 @@
 application_name: &application_name webui
 distributed: 
  method: helm
  release_name: *application_name
  chart: open-webui
  sets:
    image: 
      repository: ghcr.io/open-webui/open-webui
      tag: main
      pullPolicy: "IfNotPresent"
    openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
    ollama:
      enabled: false
    service:
      type: NodePort
      nodePort: 30679
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30679
    url: ~
  pod:
    name: open-webui-
 monolithic: 
  method: helm
  release_name: *application_name
  chart: open-webui
  sets:
    image: 
      repository: ghcr.io/open-webui/open-webui
      tag: main
      pullPolicy: "IfNotPresent"
    openaiBaseApiUrls: "http://vllm-leader-nodeport:8080:v1"
    ollama:
      enabled: false
    service:
      type: NodePort
      nodePort: 30679
  svc:
    svc_type: NodePort
    protocol: http
    hostname: 10.6.14.123
    servicename: ~
    port: 30679
    url: ~
  pod:
    name: open-webui-
--- a/webui/open-webui/.helmignore
+++ b/webui/open-webui/.helmignore
@ -0,0 +1,25 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 .drone.yml
 *.tmproj
 .vscode/
 values-minikube.yaml
--- a/webui/open-webui/Chart.lock
+++ b/webui/open-webui/Chart.lock
@ -0,0 +1,12 @@
 dependencies:
 - name: ollama
  repository: https://otwld.github.io/ollama-helm/
  version: 1.27.0
 - name: pipelines
  repository: https://helm.openwebui.com
  version: 0.7.0
 - name: tika
  repository: https://apache.jfrog.io/artifactory/tika
  version: 3.2.2
 digest: sha256:1c6e5d6a38dc8ebb4e15b1945fb222fa57b10e8882d5c79ba430648f3c5af372
 generated: "2025-08-22T15:22:03.150693+02:00"
--- a/webui/open-webui/Chart.yaml
+++ b/webui/open-webui/Chart.yaml
@ -0,0 +1,38 @@
 annotations:
  licenses: MIT
 apiVersion: v2
 appVersion: 0.6.26
 dependencies:
 - condition: ollama.enabled
  import-values:
  - child: service
    parent: ollama.service
  name: ollama
  repository: https://otwld.github.io/ollama-helm/
  version: '>=0.24.0'
 - condition: pipelines.enabled
  import-values:
  - child: service
    parent: pipelines.service
  name: pipelines
  repository: https://helm.openwebui.com
  version: '>=0.0.1'
 - condition: tika.enabled
  name: tika
  repository: https://apache.jfrog.io/artifactory/tika
  version: '>=2.9.0'
 description: "Open WebUI: A User-Friendly Web Interface for Chat Interactions \U0001F44B"
 home: https://www.openwebui.com/
 icon: https://raw.githubusercontent.com/open-webui/open-webui/main/static/favicon.png
 keywords:
 - llm
 - chat
 - web-ui
 - open-webui
 name: open-webui
 sources:
 - https://github.com/open-webui/helm-charts
 - https://github.com/open-webui/open-webui/pkgs/container/open-webui
 - https://github.com/otwld/ollama-helm/
 - https://hub.docker.com/r/ollama/ollama
 version: 7.7.0
--- a/webui/open-webui/README.md
+++ b/webui/open-webui/README.md
@ -0,0 +1,270 @@
 # open-webui
 ![Version: 7.7.0](https://img.shields.io/badge/Version-7.7.0-informational?style=flat-square) ![AppVersion: 0.6.26](https://img.shields.io/badge/AppVersion-0.6.26-informational?style=flat-square)
 Open WebUI: A User-Friendly Web Interface for Chat Interactions 👋
 **Homepage:** <https://www.openwebui.com/>
 ## Source Code
 * <https://github.com/open-webui/helm-charts>
 * <https://github.com/open-webui/open-webui/pkgs/container/open-webui>
 * <https://github.com/otwld/ollama-helm/>
 * <https://hub.docker.com/r/ollama/ollama>
 ## Installing
 Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
 ```shell
 helm repo add open-webui https://helm.openwebui.com/
 helm repo update
 ```
 Now you can install the chart:
 ```shell
 helm upgrade --install open-webui open-webui/open-webui
 ```
 ## Requirements
 | Repository | Name | Version |
 |------------|------|---------|
 | https://apache.jfrog.io/artifactory/tika | tika | >=2.9.0 |
 | https://helm.openwebui.com | pipelines | >=0.0.1 |
 | https://otwld.github.io/ollama-helm/ | ollama | >=0.24.0 |
 ## Values
 ### Logging configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | logging.components.audio | string | `""` | Set the log level for the Audio processing component |
 | logging.components.comfyui | string | `""` | Set the log level for the ComfyUI Integration component |
 | logging.components.config | string | `""` | Set the log level for the Configuration Management component |
 | logging.components.db | string | `""` | Set the log level for the Database Operations (Peewee) component |
 | logging.components.images | string | `""` | Set the log level for the Image Generation component |
 | logging.components.main | string | `""` | Set the log level for the Main Application Execution component |
 | logging.components.models | string | `""` | Set the log level for the Model Management component |
 | logging.components.ollama | string | `""` | Set the log level for the Ollama Backend Integration component |
 | logging.components.openai | string | `""` | Set the log level for the OpenAI API Integration component |
 | logging.components.rag | string | `""` | Set the log level for the Retrieval-Augmented Generation (RAG) component |
 | logging.components.webhook | string | `""` | Set the log level for the Authentication Webhook component |
 | logging.level | string | `""` | Set the global log level ["notset", "debug", "info" (default), "warning", "error", "critical"] |
 ### Azure Storage configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | persistence.azure.container | string | `""` | Sets the container name for Azure Storage |
 | persistence.azure.endpointUrl | string | `""` | Sets the endpoint URL for Azure Storage |
 | persistence.azure.key | string | `""` | Set the access key for Azure Storage (ignored if keyExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Managed Identity if run in Azure services |
 | persistence.azure.keyExistingSecret | string | `""` | Set the access key for Azure Storage from existing secret |
 | persistence.azure.keyExistingSecretKey | string | `""` | Set the access key for Azure Storage from existing secret key |
 ### Google Cloud Storage configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | persistence.gcs.appCredentialsJson | string | `""` | Contents of Google Application Credentials JSON file (ignored if appCredentialsJsonExistingSecret is set). Optional - if not provided, credentials will be taken from the environment. User credentials if run locally and Google Metadata server if run on a Google Compute Engine. File can be generated for a service account following this guide: https://developers.google.com/workspace/guides/create-credentials#service-account |
 | persistence.gcs.appCredentialsJsonExistingSecret | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret |
 | persistence.gcs.appCredentialsJsonExistingSecretKey | string | `""` | Set the Google Application Credentials JSON file for Google Cloud Storage from existing secret key |
 | persistence.gcs.bucket | string | `""` | Sets the bucket name for Google Cloud Storage. Bucket must already exist |
 ### Amazon S3 Storage configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | persistence.s3.accessKey | string | `""` | Sets the access key ID for S3 storage |
 | persistence.s3.accessKeyExistingAccessKey | string | `""` | Set the secret access key for S3 storage from existing k8s secret key |
 | persistence.s3.accessKeyExistingSecret | string | `""` | Set the secret access key for S3 storage from existing k8s secret |
 | persistence.s3.bucket | string | `""` | Sets the bucket name for S3 storage |
 | persistence.s3.endpointUrl | string | `""` | Sets the endpoint url for S3 storage |
 | persistence.s3.keyPrefix | string | `""` | Sets the key prefix for a S3 object |
 | persistence.s3.region | string | `""` | Sets the region name for S3 storage |
 | persistence.s3.secretKey | string | `""` | Sets the secret access key for S3 storage (ignored if secretKeyExistingSecret is set) |
 | persistence.s3.secretKeyExistingSecret | string | `""` | Set the secret key for S3 storage from existing k8s secret |
 | persistence.s3.secretKeyExistingSecretKey | string | `""` | Set the secret key for S3 storage from existing k8s secret key |
 ### SSO Configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.enableGroupManagement | bool | `false` | Enable OAuth group management through access token groups claim |
 | sso.enableRoleManagement | bool | `false` | Enable OAuth role management through access token roles claim |
 | sso.enableSignup | bool | `false` | Enable account creation when logging in with OAuth (distinct from regular signup) |
 | sso.enabled | bool | `false` | **Enable SSO authentication globally** must enable to use SSO authentication |
 | sso.groupManagement.groupsClaim | string | `"groups"` | The claim that contains the groups (can be nested, e.g., user.memberOf) |
 | sso.mergeAccountsByEmail | bool | `false` | Allow logging into accounts that match email from OAuth provider (considered insecure) |
 ### GitHub OAuth configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.github.clientExistingSecret | string | `""` | GitHub OAuth client secret from existing secret |
 | sso.github.clientExistingSecretKey | string | `""` | GitHub OAuth client secret key from existing secret |
 | sso.github.clientId | string | `""` | GitHub OAuth client ID |
 | sso.github.clientSecret | string | `""` | GitHub OAuth client secret (ignored if clientExistingSecret is set) |
 | sso.github.enabled | bool | `false` | Enable GitHub OAuth |
 ### Google OAuth configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.google.clientExistingSecret | string | `""` | Google OAuth client secret from existing secret |
 | sso.google.clientExistingSecretKey | string | `""` | Google OAuth client secret key from existing secret |
 | sso.google.clientId | string | `""` | Google OAuth client ID |
 | sso.google.clientSecret | string | `""` | Google OAuth client secret (ignored if clientExistingSecret is set) |
 | sso.google.enabled | bool | `false` | Enable Google OAuth |
 ### Microsoft OAuth configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.microsoft.clientExistingSecret | string | `""` | Microsoft OAuth client secret from existing secret |
 | sso.microsoft.clientExistingSecretKey | string | `""` | Microsoft OAuth client secret key from existing secret |
 | sso.microsoft.clientId | string | `""` | Microsoft OAuth client ID |
 | sso.microsoft.clientSecret | string | `""` | Microsoft OAuth client secret (ignored if clientExistingSecret is set) |
 | sso.microsoft.enabled | bool | `false` | Enable Microsoft OAuth |
 | sso.microsoft.tenantId | string | `""` | Microsoft tenant ID - use 9188040d-6c67-4c5b-b112-36a304b66dad for personal accounts |
 ### OIDC configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.oidc.clientExistingSecret | string | `""` | OICD client secret from existing secret |
 | sso.oidc.clientExistingSecretKey | string | `""` | OIDC client secret key from existing secret |
 | sso.oidc.clientId | string | `""` | OIDC client ID |
 | sso.oidc.clientSecret | string | `""` | OIDC client secret (ignored if clientExistingSecret is set) |
 | sso.oidc.enabled | bool | `false` | Enable OIDC authentication |
 | sso.oidc.providerName | string | `"SSO"` | Name of the provider to show on the UI |
 | sso.oidc.providerUrl | string | `""` | OIDC provider well known URL |
 | sso.oidc.scopes | string | `"openid email profile"` | Scopes to request (space-separated). |
 ### Role management configuration
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.roleManagement.adminRoles | string | `""` | Comma-separated list of roles allowed to log in as admin (receive open webui role admin) |
 | sso.roleManagement.allowedRoles | string | `""` | Comma-separated list of roles allowed to log in (receive open webui role user) |
 | sso.roleManagement.rolesClaim | string | `"roles"` | The claim that contains the roles (can be nested, e.g., user.roles) |
 ### SSO trusted header authentication
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | sso.trustedHeader.emailHeader | string | `""` | Header containing the user's email address |
 | sso.trustedHeader.enabled | bool | `false` | Enable trusted header authentication |
 | sso.trustedHeader.nameHeader | string | `""` | Header containing the user's name (optional, used for new user creation) |
 ### Other Values
 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
 | affinity | object | `{}` | Affinity for pod assignment |
 | annotations | object | `{}` |  |
 | args | list | `[]` | Open WebUI container arguments (overrides default) |
 | clusterDomain | string | `"cluster.local"` | Value of cluster domain |
 | command | list | `[]` | Open WebUI container command (overrides default entrypoint) |
 | commonEnvVars | list | `[]` | Env vars added to the Open WebUI deployment, common across environments. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: environment variables defined in both `extraEnvVars` and `commonEnvVars` will result in a conflict. Avoid duplicates) |
 | containerSecurityContext | object | `{}` | Configure container security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-containe> |
 | copyAppData.args | list | `[]` | Open WebUI copy-app-data init container arguments (overrides default) |
 | copyAppData.command | list | `[]` | Open WebUI copy-app-data init container command (overrides default) |
 | copyAppData.resources | object | `{}` |  |
 | databaseUrl | string | `""` | Configure database URL, needed to work with Postgres (example: `postgresql://<user>:<password>@<service>:<port>/<database>`), leave empty to use the default sqlite database |
 | enableOpenaiApi | bool | `true` | Enables the use of OpenAI APIs |
 | extraEnvFrom | list | `[]` | Env vars added from configmap or secret to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ (caution: `extraEnvVars` will take precedence over the value from `extraEnvFrom`) |
 | extraEnvVars | list | `[{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}]` | Env vars added to the Open WebUI deployment. Most up-to-date environment variables can be found here: https://docs.openwebui.com/getting-started/env-configuration/ |
 | extraEnvVars[0] | object | `{"name":"OPENAI_API_KEY","value":"0p3n-w3bu!"}` | Default API key value for Pipelines. Should be updated in a production deployment, or be changed to the required API key if not using Pipelines |
 | extraInitContainers | list | `[]` | Additional init containers to add to the deployment/statefulset ref: <https://kubernetes.io/docs/concepts/workloads/pods/init-containers/> |
 | extraResources | list | `[]` | Extra resources to deploy with Open WebUI |
 | hostAliases | list | `[]` | HostAliases to be added to hosts-file of each container |
 | image | object | `{"pullPolicy":"IfNotPresent","repository":"ghcr.io/open-webui/open-webui","tag":""}` | Open WebUI image tags can be found here: https://github.com/open-webui/open-webui |
 | imagePullSecrets | list | `[]` | Configure imagePullSecrets to use private registry ref: <https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry> |
 | ingress.additionalHosts | list | `[]` |  |
 | ingress.annotations | object | `{}` | Use appropriate annotations for your Ingress controller, e.g., for NGINX: |
 | ingress.class | string | `""` |  |
 | ingress.enabled | bool | `false` |  |
 | ingress.existingSecret | string | `""` |  |
 | ingress.extraLabels | object | `{}` | Additional custom labels to add to the Ingress metadata Useful for tagging, selecting, or applying policies to the Ingress via labels. |
 | ingress.host | string | `"chat.example.com"` |  |
 | ingress.tls | bool | `false` |  |
 | livenessProbe | object | `{}` | Probe for liveness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
 | managedCertificate.domains[0] | string | `"chat.example.com"` |  |
 | managedCertificate.enabled | bool | `false` |  |
 | managedCertificate.name | string | `"mydomain-chat-cert"` |  |
 | nameOverride | string | `""` |  |
 | namespaceOverride | string | `""` |  |
 | nodeSelector | object | `{}` | Node labels for pod assignment. |
 | ollama.enabled | bool | `true` | Automatically install Ollama Helm chart from https://otwld.github.io/ollama-helm/. Use [Helm Values](https://github.com/otwld/ollama-helm/#helm-values) to configure |
 | ollama.fullnameOverride | string | `"open-webui-ollama"` | If enabling embedded Ollama, update fullnameOverride to your desired Ollama name value, or else it will use the default ollama.name value from the Ollama chart |
 | ollamaUrls | list | `[]` | A list of Ollama API endpoints. These can be added in lieu of automatically installing the Ollama Helm chart, or in addition to it. |
 | ollamaUrlsFromExtraEnv | bool | `false` | Disables taking Ollama Urls from `ollamaUrls`  list |
 | openaiBaseApiUrl | string | `"https://api.openai.com/v1"` | OpenAI base API URL to use. Defaults to the Pipelines service endpoint when Pipelines are enabled, and "https://api.openai.com/v1" if Pipelines are not enabled and this value is blank |
 | openaiBaseApiUrls | list | `[]` | OpenAI base API URLs to use. Overwrites the value in openaiBaseApiUrl if set |
 | persistence.accessModes | list | `["ReadWriteOnce"]` | If using multiple replicas, you must update accessModes to ReadWriteMany |
 | persistence.annotations | object | `{}` |  |
 | persistence.enabled | bool | `true` |  |
 | persistence.existingClaim | string | `""` | Use existingClaim if you want to re-use an existing Open WebUI PVC instead of creating a new one |
 | persistence.provider | string | `"local"` | Sets the storage provider, availables values are `local`, `s3`, `gcs` or `azure` |
 | persistence.selector | object | `{}` |  |
 | persistence.size | string | `"2Gi"` |  |
 | persistence.storageClass | string | `""` |  |
 | persistence.subPath | string | `""` | Subdirectory of Open WebUI PVC to mount. Useful if root directory is not empty. |
 | pipelines.enabled | bool | `true` | Automatically install Pipelines chart to extend Open WebUI functionality using Pipelines: https://github.com/open-webui/pipelines |
 | pipelines.extraEnvVars | list | `[]` | This section can be used to pass required environment variables to your pipelines (e.g. Langfuse hostname) |
 | podAnnotations | object | `{}` |  |
 | podLabels | object | `{}` |  |
 | podSecurityContext | object | `{}` | Configure pod security context ref: <https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container> |
 | priorityClassName | string | `""` | Priority class name for the Open WebUI pods |
 | readinessProbe | object | `{}` | Probe for readiness of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
 | replicaCount | int | `1` |  |
 | resources | object | `{}` |  |
 | revisionHistoryLimit | int | `10` | Revision history limit for the workload manager (deployment). |
 | runtimeClassName | string | `""` | Configure runtime class ref: <https://kubernetes.io/docs/concepts/containers/runtime-class/> |
 | service | object | `{"annotations":{},"containerPort":8080,"labels":{},"loadBalancerClass":"","nodePort":"","port":80,"type":"ClusterIP"}` | Service values to expose Open WebUI pods to cluster |
 | serviceAccount.annotations | object | `{}` |  |
 | serviceAccount.automountServiceAccountToken | bool | `false` |  |
 | serviceAccount.enable | bool | `true` |  |
 | serviceAccount.name | string | `""` |  |
 | startupProbe | object | `{}` | Probe for startup of the Open WebUI container ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes> |
 | strategy | object | `{}` | Strategy for updating the workload manager: deployment or statefulset |
 | tika.enabled | bool | `false` | Automatically install Apache Tika to extend Open WebUI |
 | tolerations | list | `[]` | Tolerations for pod assignment |
 | topologySpreadConstraints | list | `[]` | Topology Spread Constraints for pod assignment |
 | volumeMounts | object | `{"container":[],"initContainer":[]}` | Configure container volume mounts ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
 | volumes | list | `[]` | Configure pod volumes ref: <https://kubernetes.io/docs/tasks/configure-pod-container/configure-volume-storage/> |
 | websocket.enabled | bool | `false` | Enables websocket support in Open WebUI with env `ENABLE_WEBSOCKET_SUPPORT` |
 | websocket.manager | string | `"redis"` | Specifies the websocket manager to use with env `WEBSOCKET_MANAGER`: redis (default) |
 | websocket.nodeSelector | object | `{}` | Node selector for websocket pods |
 | websocket.redis | object | `{"affinity":{},"annotations":{},"args":[],"command":[],"enabled":true,"image":{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"},"labels":{},"name":"open-webui-redis","pods":{"annotations":{},"labels":{}},"resources":{},"securityContext":{},"service":{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"},"tolerations":[]}` | Deploys a redis |
 | websocket.redis.affinity | object | `{}` | Redis affinity for pod assignment |
 | websocket.redis.annotations | object | `{}` | Redis annotations |
 | websocket.redis.args | list | `[]` | Redis arguments (overrides default) |
 | websocket.redis.command | list | `[]` | Redis command (overrides default) |
 | websocket.redis.enabled | bool | `true` | Enable redis installation |
 | websocket.redis.image | object | `{"pullPolicy":"IfNotPresent","repository":"redis","tag":"7.4.2-alpine3.21"}` | Redis image |
 | websocket.redis.labels | object | `{}` | Redis labels |
 | websocket.redis.name | string | `"open-webui-redis"` | Redis name |
 | websocket.redis.pods | object | `{"annotations":{},"labels":{}}` | Redis pod |
 | websocket.redis.pods.annotations | object | `{}` | Redis pod annotations |
 | websocket.redis.pods.labels | object | `{}` | Redis pod labels |
 | websocket.redis.resources | object | `{}` | Redis resources |
 | websocket.redis.securityContext | object | `{}` | Redis security context |
 | websocket.redis.service | object | `{"annotations":{},"containerPort":6379,"labels":{},"nodePort":"","port":6379,"portName":"http","type":"ClusterIP"}` | Redis service |
 | websocket.redis.service.annotations | object | `{}` | Redis service annotations |
 | websocket.redis.service.containerPort | int | `6379` | Redis container/target port |
 | websocket.redis.service.labels | object | `{}` | Redis service labels |
 | websocket.redis.service.nodePort | string | `""` | Redis service node port. Valid only when type is `NodePort` |
 | websocket.redis.service.port | int | `6379` | Redis service port |
 | websocket.redis.service.portName | string | `"http"` | Redis service port name. Istio needs this to be something like `tcp-redis` |
 | websocket.redis.service.type | string | `"ClusterIP"` | Redis service type |
 | websocket.redis.tolerations | list | `[]` | Redis tolerations for pod assignment |
 | websocket.url | string | `"redis://open-webui-redis:6379/0"` | Specifies the URL of the Redis instance for websocket communication. Template with `redis://[:<password>@]<hostname>:<port>/<db>` |
 ----------------------------------------------
 Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
--- a/webui/open-webui/README.md.gotmpl
+++ b/webui/open-webui/README.md.gotmpl
@ -0,0 +1,36 @@
 {{ template "chart.header" . }}
 {{ template "chart.deprecationWarning" . }}
 {{ template "chart.badgesSection" . }}
 {{ template "chart.description" . }}
 {{ template "chart.homepageLine" . }}
 {{ template "chart.maintainersSection" . }}
 {{ template "chart.sourcesSection" . }}
 ## Installing
 Before you can install, you need to add the `open-webui` repo to [Helm](https://helm.sh)
 ```shell
 helm repo add open-webui https://helm.openwebui.com/
 helm repo update
 ```
 Now you can install the chart:
 ```shell
 helm upgrade --install open-webui open-webui/open-webui
 ```
 {{ template "chart.requirementsSection" . }}
 {{ template "chart.valuesSection" . }}
 ----------------------------------------------
 Autogenerated from chart metadata using [helm-docs](https://github.com/norwoodj/helm-docs/).
--- a/webui/open-webui/charts/ollama/.helmignore
+++ b/webui/open-webui/charts/ollama/.helmignore
@ -0,0 +1,30 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 .drone.yml
 *.tmproj
 .vscode/
 #others
 .github
 kind-config.yml
 ci/
--- a/webui/open-webui/charts/ollama/.ollama-helm/.helmignore
+++ b/webui/open-webui/charts/ollama/.ollama-helm/.helmignore
@ -0,0 +1,30 @@
 # Patterns to ignore when building packages.
 # This supports shell glob matching, relative path matching, and
 # negation (prefixed with !). Only one pattern per line.
 .DS_Store
 # Common VCS dirs
 .git/
 .gitignore
 .bzr/
 .bzrignore
 .hg/
 .hgignore
 .svn/
 # Common backup files
 *.swp
 *.bak
 *.tmp
 *.orig
 *~
 # Various IDEs
 .project
 .idea/
 .drone.yml
 *.tmproj
 .vscode/
 #others
 .github
 kind-config.yml
 ci/
--- a/webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
+++ b/webui/open-webui/charts/ollama/.ollama-helm/Chart.yaml
@ -0,0 +1,33 @@
 apiVersion: v2
 name: ollama
 description: Get up and running with large language models locally.
 type: application
 version: 1.27.0
 appVersion: "0.11.4"
 annotations:
  artifacthub.io/category: ai-machine-learning
  artifacthub.io/changes: |
    - kind: changed
      description: upgrade app version to 0.11.4
      links:
        - name: Ollama release v0.11.4
          url: https://github.com/ollama/ollama/releases/tag/v0.11.4
 kubeVersion: "^1.16.0-0"
 home: https://ollama.ai/
 icon: https://ollama.ai/public/ollama.png
 keywords:
  - ai
  - llm
  - llama
  - mistral
 sources:
  - https://github.com/ollama/ollama
  - https://github.com/otwld/ollama-helm
 maintainers:
  - name: OTWLD
    email: contact@otwld.com
--- a/webui/open-webui/charts/ollama/.ollama-helm/LICENSE
+++ b/webui/open-webui/charts/ollama/.ollama-helm/LICENSE
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 OTWLD
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/webui/open-webui/charts/ollama/.ollama-helm/README.md
+++ b/webui/open-webui/charts/ollama/.ollama-helm/README.md
@ -0,0 +1,331 @@
 ![otwld ollama helm chart banner](./banner.png)
 ![GitHub License](https://img.shields.io/github/license/otwld/ollama-helm)
 [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/ollama-helm)](https://artifacthub.io/packages/helm/ollama-helm/ollama)
 [![Helm Lint and Test](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/otwld/ollama-helm/actions/workflows/ci.yaml)
 [![Discord](https://img.shields.io/badge/Discord-OTWLD-blue?logo=discord&logoColor=white)](https://discord.gg/U24mpqTynB)
 [Ollama](https://ollama.ai/), get up and running with large language models, locally.
 This Community Chart is for deploying [Ollama](https://github.com/ollama/ollama).
 ## Requirements
 - Kubernetes: `>= 1.16.0-0` for **CPU only**
 - Kubernetes: `>= 1.26.0-0` for **GPU** stable support (NVIDIA and AMD)
 *Not all GPUs are currently supported with ollama (especially with AMD)*
 ## Deploying Ollama chart
 To install the `ollama` chart in the `ollama` namespace:
 > [!IMPORTANT]  
 > We are migrating the registry from https://otwld.github.io/ollama-helm/ url to OTWLD Helm central
 > registry https://helm.otwld.com/
 > Please update your Helm registry accordingly.
 ```console
 helm repo add otwld https://helm.otwld.com/
 helm repo update
 helm install ollama otwld/ollama --namespace ollama --create-namespace
 ```
 ## Upgrading Ollama chart
 First please read the [release notes](https://github.com/ollama/ollama/releases) of Ollama to make sure there are no
 backwards incompatible changes.
 Make adjustments to your values as needed, then run `helm upgrade`:
 ```console
 # -- This pulls the latest version of the ollama chart from the repo.
 helm repo update
 helm upgrade ollama otwld/ollama --namespace ollama --values values.yaml
 ```
 ## Uninstalling Ollama chart
 To uninstall/delete the `ollama` deployment in the `ollama` namespace:
 ```console
 helm delete ollama --namespace ollama
 ```
 Substitute your values if they differ from the examples. See `helm delete --help` for a full reference on `delete`
 parameters and flags.
 ## Interact with Ollama
 - **Ollama documentation can be found [HERE](https://github.com/ollama/ollama/tree/main/docs)**
 - Interact with RESTful API: [Ollama API](https://github.com/ollama/ollama/blob/main/docs/api.md)
 - Interact with official clients libraries: [ollama-js](https://github.com/ollama/ollama-js#custom-client)
  and [ollama-python](https://github.com/ollama/ollama-python#custom-client)
 - Interact with langchain: [langchain-js](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainjs.md)
  and [langchain-python](https://github.com/ollama/ollama/blob/main/docs/tutorials/langchainpy.md)
 ## Examples
 - **It's highly recommended to run an updated version of Kubernetes for deploying ollama with GPU**
 ### Basic values.yaml example with GPU and two models pulled at startup
 ```
 ollama:
  gpu:
    # -- Enable GPU integration
    enabled: true
    # -- GPU type: 'nvidia' or 'amd'
    type: 'nvidia'
    # -- Specify the number of GPU to 1
    number: 1
  # -- List of models to pull at container startup
  models:
    pull:
      - mistral
      - llama2
 ```
 ---
 ### Basic values.yaml example with Ingress
 ```
 ollama:
  models:
    pull:
      - llama2
 ingress:
  enabled: true
  hosts:
  - host: ollama.domain.lan
    paths:
      - path: /
        pathType: Prefix
 ```
 - *API is now reachable at `ollama.domain.lan`*
 ---
 ### Create and run model from template
 ```
 ollama:
  models:
    create:
      - name: llama3.1-ctx32768
        template: |
          FROM llama3.1
          PARAMETER num_ctx 32768
    run:
      - llama3.1-ctx32768
 ```
 ## Upgrading from 0.X.X to 1.X.X
 The version 1.X.X introduces the ability to load models in memory at startup, the values have been changed.
 Please change `ollama.models` to `ollama.models.pull` to avoid errors before upgrading:
 ```yaml
 ollama:
  models:
    - mistral
    - llama2
 ```
 To:
 ```yaml
 ollama:
  models:
    pull:
      - mistral
      - llama2
 ```
 ## Helm Values
 - See [values.yaml](values.yaml) to see the Chart's default values.
 | Key                                        | Type   | Default             | Description                                                                                                                                                                                                                                                                                                                                               |
 |--------------------------------------------|--------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | affinity                                   | object | `{}`                | Affinity for pod assignment                                                                                                                                                                                                                                                                                                                               |
 | autoscaling.enabled                        | bool   | `false`             | Enable autoscaling                                                                                                                                                                                                                                                                                                                                        |
 | autoscaling.maxReplicas                    | int    | `100`               | Number of maximum replicas                                                                                                                                                                                                                                                                                                                                |
 | autoscaling.minReplicas                    | int    | `1`                 | Number of minimum replicas                                                                                                                                                                                                                                                                                                                                |
 | autoscaling.targetCPUUtilizationPercentage | int    | `80`                | CPU usage to target replica                                                                                                                                                                                                                                                                                                                               |
 | deployment.labels                          | object | `{}`                | Labels to add to the deployment                                                                                                                                                                                                                                                                                                                           |
 | extraArgs                                  | list   | `[]`                | Additional arguments on the output Deployment definition.                                                                                                                                                                                                                                                                                                 |
 | extraEnv                                   | list   | `[]`                | Additional environments variables on the output Deployment definition. For extra OLLAMA env, please refer to https://github.com/ollama/ollama/blob/main/envconfig/config.go                                                                                                                                                                               |
 | extraEnvFrom                               | list   | `[]`                | Additionl environment variables from external sources (like ConfigMap)                                                                                                                                                                                                                                                                                    |
 | extraObjects                               | list   | `[]`                | Extra K8s manifests to deploy                                                                                                                                                                                                                                                                                                                             |
 | fullnameOverride                           | string | `""`                | String to fully override template                                                                                                                                                                                                                                                                                                                         |
 | hostIPC                                    | bool   | `false`             | Use the host’s ipc namespace.                                                                                                                                                                                                                                                                                                                             |
 | hostNetwork                                | bool   | `false`             | Use the host's network namespace.                                                                                                                                                                                                                                                                                                                         |
 | hostPID                                    | bool   | `false`             | Use the host’s pid namespace                                                                                                                                                                                                                                                                                                                              |
 | image.pullPolicy                           | string | `"IfNotPresent"`    | Docker pull policy                                                                                                                                                                                                                                                                                                                                        |
 | image.repository                           | string | `"ollama/ollama"`   | Docker image registry                                                                                                                                                                                                                                                                                                                                     |
 | image.tag                                  | string | `""`                | Docker image tag, overrides the image tag whose default is the chart appVersion.                                                                                                                                                                                                                                                                          |
 | imagePullSecrets                           | list   | `[]`                | Docker registry secret names as an array                                                                                                                                                                                                                                                                                                                  |
 | ingress.annotations                        | object | `{}`                | Additional annotations for the Ingress resource.                                                                                                                                                                                                                                                                                                          |
 | ingress.className                          | string | `""`                | IngressClass that will be used to implement the Ingress (Kubernetes 1.18+)                                                                                                                                                                                                                                                                                |
 | ingress.enabled                            | bool   | `false`             | Enable ingress controller resource                                                                                                                                                                                                                                                                                                                        |
 | ingress.hosts[0].host                      | string | `"ollama.local"`    |                                                                                                                                                                                                                                                                                                                                                           |
 | ingress.hosts[0].paths[0].path             | string | `"/"`               |                                                                                                                                                                                                                                                                                                                                                           |
 | ingress.hosts[0].paths[0].pathType         | string | `"Prefix"`          |                                                                                                                                                                                                                                                                                                                                                           |
 | ingress.tls                                | list   | `[]`                | The tls configuration for hostnames to be covered with this ingress record.                                                                                                                                                                                                                                                                               |
 | initContainers                             | list   | `[]`                | Init containers to add to the pod                                                                                                                                                                                                                                                                                                                         |
 | knative.annotations                        | object | `{}`                | Knative service annotations                                                                                                                                                                                                                                                                                                                               |
 | knative.containerConcurrency               | int    | `0`                 | Knative service container concurrency                                                                                                                                                                                                                                                                                                                     |
 | knative.enabled                            | bool   | `false`             | Enable Knative integration                                                                                                                                                                                                                                                                                                                                |
 | knative.idleTimeoutSeconds                 | int    | `300`               | Knative service idle timeout seconds                                                                                                                                                                                                                                                                                                                      |
 | knative.responseStartTimeoutSeconds        | int    | `300`               | Knative service response start timeout seconds                                                                                                                                                                                                                                                                                                            |
 | knative.timeoutSeconds                     | int    | `300`               | Knative service timeout seconds                                                                                                                                                                                                                                                                                                                           |
 | lifecycle                                  | object | `{}`                | Lifecycle for pod assignment (override ollama.models startup pull/run)                                                                                                                                                                                                                                                                                    |
 | livenessProbe.enabled                      | bool   | `true`              | Enable livenessProbe                                                                                                                                                                                                                                                                                                                                      |
 | livenessProbe.failureThreshold             | int    | `6`                 | Failure threshold for livenessProbe                                                                                                                                                                                                                                                                                                                       |
 | livenessProbe.initialDelaySeconds          | int    | `60`                | Initial delay seconds for livenessProbe                                                                                                                                                                                                                                                                                                                   |
 | livenessProbe.path                         | string | `"/"`               | Request path for livenessProbe                                                                                                                                                                                                                                                                                                                            |
 | livenessProbe.periodSeconds                | int    | `10`                | Period seconds for livenessProbe                                                                                                                                                                                                                                                                                                                          |
 | livenessProbe.successThreshold             | int    | `1`                 | Success threshold for livenessProbe                                                                                                                                                                                                                                                                                                                       |
 | livenessProbe.timeoutSeconds               | int    | `5`                 | Timeout seconds for livenessProbe                                                                                                                                                                                                                                                                                                                         |
 | nameOverride                               | string | `""`                | String to partially override template  (will maintain the release name)                                                                                                                                                                                                                                                                                   |
 | namespaceOverride                          | string | `""`                | String to fully override namespace                                                                                                                                                                                                                                                                                                                        |
 | nodeSelector                               | object | `{}`                | Node labels for pod assignment.                                                                                                                                                                                                                                                                                                                           |
 | ollama.gpu.draDriverClass                  | string | `"gpu.nvidia.com"`  | DRA GPU DriverClass                                                                                                                                                                                                                                                                                                                                       |
 | ollama.gpu.draEnabled                      | bool   | `false`             | Enable DRA GPU integration If enabled, it will use DRA instead of Device Driver Plugin and create a ResourceClaim and GpuClaimParameters                                                                                                                                                                                                                  |
 | ollama.gpu.draExistingClaimTemplate        | string | `""`                | Existing DRA GPU ResourceClaim Template                                                                                                                                                                                                                                                                                                                   |
 | ollama.gpu.enabled                         | bool   | `false`             | Enable GPU integration                                                                                                                                                                                                                                                                                                                                    |
 | ollama.gpu.mig.devices                     | object | `{}`                | Specify the mig devices and the corresponding number                                                                                                                                                                                                                                                                                                      |
 | ollama.gpu.mig.enabled                     | bool   | `false`             | Enable multiple mig devices If enabled you will have to specify the mig devices If enabled is set to false this section is ignored                                                                                                                                                                                                                        |
 | ollama.gpu.number                          | int    | `1`                 | Specify the number of GPU If you use MIG section below then this parameter is ignored                                                                                                                                                                                                                                                                     |
 | ollama.gpu.nvidiaResource                  | string | `"nvidia.com/gpu"`  | only for nvidia cards; change to (example) 'nvidia.com/mig-1g.10gb' to use MIG slice                                                                                                                                                                                                                                                                      |
 | ollama.gpu.type                            | string | `"nvidia"`          | GPU type: 'nvidia' or 'amd' If 'ollama.gpu.enabled', default value is nvidia If set to 'amd', this will add 'rocm' suffix to image tag if 'image.tag' is not override This is due cause AMD and CPU/CUDA are different images                                                                                                                             |
 | ollama.insecure                            | bool   | `false`             | Add insecure flag for pulling at container startup                                                                                                                                                                                                                                                                                                        |
 | ollama.models.clean                        | bool   | `false`             | Automatically remove models present on the disk but not specified in the values file                                                                                                                                                                                                                                                                      |
 | ollama.models.create                       | list   | `[]`                | List of models to create at container startup, there are two options 1. Create a raw model 2. Load a model from configMaps, configMaps must be created before and are loaded as volume in "/models" directory. create:  - name: llama3.1-ctx32768    configMapRef: my-configmap    configMapKeyRef: configmap-key  - name: llama3.1-ctx32768    template: |      FROM llama3.1      PARAMETER num_ctx 32768 |
 | ollama.models.pull                         | list   | `[]`                | List of models to pull at container startup The more you add, the longer the container will take to start if models are not present pull:  - llama2  - mistral                                                                                                                                                                                            |
 | ollama.models.run                          | list   | `[]`                | List of models to load in memory at container startup run:  - llama2  - mistral                                                                                                                                                                                                                                                                           |
 | ollama.mountPath                           | string | `""`                | Override ollama-data volume mount path, default: "/root/.ollama"                                                                                                                                                                                                                                                                                          |
 | ollama.port                                | int    | `11434`             |                                                                                                                                                                                                                                                                                                                                                           |
 | persistentVolume.accessModes               | list   | `["ReadWriteOnce"]` | Ollama server data Persistent Volume access modes Must match those of existing PV or dynamic provisioner Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/                                                                                                                                                                                    |
 | persistentVolume.annotations               | object | `{}`                | Ollama server data Persistent Volume annotations                                                                                                                                                                                                                                                                                                          |
 | persistentVolume.enabled                   | bool   | `false`             | Enable persistence using PVC                                                                                                                                                                                                                                                                                                                              |
 | persistentVolume.existingClaim             | string | `""`                | If you'd like to bring your own PVC for persisting Ollama state, pass the name of the created + ready PVC here. If set, this Chart will not create the default PVC. Requires server.persistentVolume.enabled: true                                                                                                                                        |
 | persistentVolume.size                      | string | `"30Gi"`            | Ollama server data Persistent Volume size                                                                                                                                                                                                                                                                                                                 |
 | persistentVolume.storageClass              | string | `""`                | Ollama server data Persistent Volume Storage Class If defined, storageClassName: <storageClass> If set to "-", storageClassName: "", which disables dynamic provisioning If undefined (the default) or set to null, no storageClassName spec is set, choosing the default provisioner.  (gp2 on AWS, standard on GKE, AWS & OpenStack)                    |
 | persistentVolume.subPath                   | string | `""`                | Subdirectory of Ollama server data Persistent Volume to mount Useful if the volume's root directory is not empty                                                                                                                                                                                                                                          |
 | persistentVolume.volumeMode                | string | `""`                | Ollama server data Persistent Volume Binding Mode If defined, volumeMode: <volumeMode> If empty (the default) or set to null, no volumeBindingMode spec is set, choosing the default mode.                                                                                                                                                                |
 | persistentVolume.volumeName                | string | `""`                | Pre-existing PV to attach this claim to Useful if a CSI auto-provisions a PV for you and you want to always reference the PV moving forward                                                                                                                                                                                                               |
 | podAnnotations                             | object | `{}`                | Map of annotations to add to the pods                                                                                                                                                                                                                                                                                                                     |
 | podLabels                                  | object | `{}`                | Map of labels to add to the pods                                                                                                                                                                                                                                                                                                                          |
 | podSecurityContext                         | object | `{}`                | Pod Security Context                                                                                                                                                                                                                                                                                                                                      |
 | priorityClassName                          | string | `""`                | Priority Class Name                                                                                                                                                                                                                                                                                                                                       |
 | readinessProbe.enabled                     | bool   | `true`              | Enable readinessProbe                                                                                                                                                                                                                                                                                                                                     |
 | readinessProbe.failureThreshold            | int    | `6`                 | Failure threshold for readinessProbe                                                                                                                                                                                                                                                                                                                      |
 | readinessProbe.initialDelaySeconds         | int    | `30`                | Initial delay seconds for readinessProbe                                                                                                                                                                                                                                                                                                                  |
 | readinessProbe.path                        | string | `"/"`               | Request path for readinessProbe                                                                                                                                                                                                                                                                                                                           |
 | readinessProbe.periodSeconds               | int    | `5`                 | Period seconds for readinessProbe                                                                                                                                                                                                                                                                                                                         |
 | readinessProbe.successThreshold            | int    | `1`                 | Success threshold for readinessProbe                                                                                                                                                                                                                                                                                                                      |
 | readinessProbe.timeoutSeconds              | int    | `3`                 | Timeout seconds for readinessProbe                                                                                                                                                                                                                                                                                                                        |
 | replicaCount                               | int    | `1`                 | Number of replicas                                                                                                                                                                                                                                                                                                                                        |
 | resources.limits                           | object | `{}`                | Pod limit                                                                                                                                                                                                                                                                                                                                                 |
 | resources.requests                         | object | `{}`                | Pod requests                                                                                                                                                                                                                                                                                                                                              |
 | runtimeClassName                           | string | `""`                | Specify runtime class                                                                                                                                                                                                                                                                                                                                     |
 | securityContext                            | object | `{}`                | Container Security Context                                                                                                                                                                                                                                                                                                                                |
 | service.annotations                        | object | `{}`                | Annotations to add to the service                                                                                                                                                                                                                                                                                                                         |
 | service.labels                             | object | `{}`                | Labels to add to the service                                                                                                                                                                                                                                                                                                                              |
 | service.loadBalancerIP                     | string | `nil`               | Load Balancer IP address                                                                                                                                                                                                                                                                                                                                  |
 | service.nodePort                           | int    | `31434`             | Service node port when service type is 'NodePort'                                                                                                                                                                                                                                                                                                         |
 | service.port                               | int    | `11434`             | Service port                                                                                                                                                                                                                                                                                                                                              |
 | service.type                               | string | `"ClusterIP"`       | Service type                                                                                                                                                                                                                                                                                                                                              |
 | serviceAccount.annotations                 | object | `{}`                | Annotations to add to the service account                                                                                                                                                                                                                                                                                                                 |
 | serviceAccount.automount                   | bool   | `true`              | Automatically mount a ServiceAccount's API credentials?                                                                                                                                                                                                                                                                                                   |
 | serviceAccount.create                      | bool   | `true`              | Specifies whether a service account should be created                                                                                                                                                                                                                                                                                                     |
 | serviceAccount.name                        | string | `""`                | The name of the service account to use. If not set and create is true, a name is generated using the fullname template                                                                                                                                                                                                                                    |
 | terminationGracePeriodSeconds              | int    | `120`               | Wait for a grace period                                                                                                                                                                                                                                                                                                                                   |
 | tests.annotations                          | object | `{}`                | Annotations to add to the tests                                                                                                                                                                                                                                                                                                                           |
 | tests.enabled                              | bool   | `true`              |                                                                                                                                                                                                                                                                                                                                                           |
 | tests.labels                               | object | `{}`                | Labels to add to the tests                                                                                                                                                                                                                                                                                                                                |
 | tolerations                                | list   | `[]`                | Tolerations for pod assignment                                                                                                                                                                                                                                                                                                                            |
 | topologySpreadConstraints                  | object | `{}`                | Topology Spread Constraints for pod assignment                                                                                                                                                                                                                                                                                                            |
 | updateStrategy.type                        | string | `"Recreate"`        | Deployment strategy can be "Recreate" or "RollingUpdate". Default is Recreate                                                                                                                                                                                                                                                                             |
 | volumeMounts                               | list   | `[]`                | Additional volumeMounts on the output Deployment definition.                                                                                                                                                                                                                                                                                              |
 | volumes                                    | list   | `[]`                | Additional volumes on the output Deployment definition.                                                                                                                                                                                                                                                                                                   |
 ----------------------------------------------
 ## Core team
 <table>
    <tr>
       <td align="center">
            <a href="https://github.com/jdetroyes"
                ><img
                    src="https://github.com/jdetroyes.png?size=200"
                    width="50"
                    style="margin-bottom: -4px; border-radius: 8px;"
                    alt="Jean Baptiste Detroyes"
                /><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Jean Baptiste&nbsp;Detroyes&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
            >
            <div style="margin-top: 4px">
                <a href="https://github.com/jdetroyes" title="Github"
                    ><img
                        width="16"
                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
                /></a>
                <a
                    href="mailto:jdetroyes@otwld.com"
                    title="Email"
                    ><img
                        width="16"
                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
                /></a>
            </div>
        </td>
       <td align="center">
            <a href="https://github.com/ntrehout"
                ><img
                    src="https://github.com/ntrehout.png?size=200"
                    width="50"
                    style="margin-bottom: -4px; border-radius: 8px;"
                    alt="Jean Baptiste Detroyes"
                /><br /><b>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Nathan&nbsp;Tréhout&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</b></a
            >
            <div style="margin-top: 4px">
                <a href="https://x.com/n_trehout" title="Twitter"
                    ><img
                        width="16"
                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/twitter.svg"
                /></a>
                <a href="https://github.com/ntrehout" title="Github"
                    ><img
                        width="16"
                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/github.svg"
                /></a>
                <a
                    href="mailto:ntrehout@otwld.com"
                    title="Email"
                    ><img
                        width="16"
                        src="https://raw.githubusercontent.com/MarsiBarsi/readme-icons/main/send.svg"
                /></a>
            </div>
        </td>
    </tr>
 </table>
 ## Support
 - For questions, suggestions, and discussion about Ollama please refer to
  the [Ollama issue page](https://github.com/ollama/ollama/issues)
 - For questions, suggestions, and discussion about this chart please
  visit [Ollama-Helm issue page](https://github.com/otwld/ollama-helm/issues) or join
  our [OTWLD Discord](https://discord.gg/U24mpqTynB)
--- a/webui/open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
+++ b/webui/open-webui/charts/ollama/.ollama-helm/templates/NOTES.txt
@ -0,0 +1,25 @@
 1. Get the application URL by running these commands:
 {{- if .Values.knative.enabled }}
    export KSERVICE_URL=$(kubectl get ksvc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} -o jsonpath={.status.url})
    echo "Visit $KSERVICE_URL to use your application"
 {{- else if .Values.ingress.enabled }}
 {{- range $host := .Values.ingress.hosts }}
  {{- range .paths }}
  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
  {{- end }}
 {{- end }}
 {{- else if contains "NodePort" .Values.service.type }}
  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "ollama.fullname" . }})
  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
  echo http://$NODE_IP:$NODE_PORT
 {{- else if contains "LoadBalancer" .Values.service.type }}
     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
           You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "ollama.fullname" . }}'
  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "ollama.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
  echo http://$SERVICE_IP:{{ .Values.service.port }}
 {{- else if contains "ClusterIP" .Values.service.type }}
  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "ollama.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
 {{- end }}
--- a/webui/open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
+++ b/webui/open-webui/charts/ollama/.ollama-helm/templates/_helpers.tpl
@ -0,0 +1,80 @@
 {{/*
 Allow the release namespace to be overridden for multi-namespace deployments in combined charts
 */}}
 {{- define "ollama.namespace" -}}
  {{- if .Values.namespaceOverride -}}
    {{- .Values.namespaceOverride -}}
  {{- else -}}
    {{- .Release.Namespace -}}
  {{- end -}}
 {{- end -}}
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "ollama.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "ollama.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "ollama.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "ollama.labels" -}}
 helm.sh/chart: {{ include "ollama.chart" . }}
 {{ include "ollama.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
 {{/*
 Selector labels
 */}}
 {{- define "ollama.selectorLabels" -}}
 app.kubernetes.io/name: {{ include "ollama.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 {{/*
 Create the name of the service account to use
 */}}
 {{- define "ollama.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
 {{- default (include "ollama.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
 {{/*
 Models mount path
 */}}
 {{- define "ollama.modelsMountPath" -}}
 {{- printf "%s/models" (((.Values).ollama).mountPath | default "/root/.ollama") }}
 {{- end -}}
--- a/Show More
+++ b/Show More