From e8451c067588976860a8fc7e4f5d978c97daa5e7 Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Mon, 17 Nov 2025 16:50:28 +0800 Subject: [PATCH] chore(repo): init helm-charts repo with two charts and automated package/push --- .env.example | 16 + .gitea/workflows/helm-publish.yml | 28 ++ .gitignore | 11 + README.md | 47 +++ code-server-chart/Chart.yaml | 23 ++ code-server-chart/README.md | 2 + code-server-chart/templates/NOTES.txt | 24 ++ code-server-chart/templates/_helpers.tpl | 63 ++++ code-server-chart/templates/deployment.yaml | 190 ++++++++++ code-server-chart/templates/ingress.yaml | 63 ++++ code-server-chart/templates/pvc.yaml | 29 ++ code-server-chart/templates/secrets.yaml | 20 + code-server-chart/templates/service.yaml | 25 ++ .../templates/serviceaccount.yaml | 11 + .../templates/tests/test-connection.yaml | 18 + code-server-chart/values.schema.json | 93 +++++ code-server-chart/values.yaml | 226 ++++++++++++ scripts/helm_publish.sh | 90 +++++ vllm-serve/.helmignore | 23 ++ vllm-serve/Chart.yaml | 8 + vllm-serve/templates/NOTES.txt | 16 + vllm-serve/templates/_helpers.tpl | 62 ++++ vllm-serve/templates/lws.yaml | 193 ++++++++++ vllm-serve/templates/nfs-pvc.yaml | 40 ++ vllm-serve/templates/services.yaml | 35 ++ vllm-serve/templates/single.yaml | 127 +++++++ vllm-serve/values.schema.json | 346 ++++++++++++++++++ vllm-serve/values.yaml | 76 ++++ 28 files changed, 1905 insertions(+) create mode 100644 .env.example create mode 100644 .gitea/workflows/helm-publish.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 code-server-chart/Chart.yaml create mode 100644 code-server-chart/README.md create mode 100644 code-server-chart/templates/NOTES.txt create mode 100644 code-server-chart/templates/_helpers.tpl create mode 100644 code-server-chart/templates/deployment.yaml create mode 100644 code-server-chart/templates/ingress.yaml create mode 100644 code-server-chart/templates/pvc.yaml create mode 100644 code-server-chart/templates/secrets.yaml create mode 100644 code-server-chart/templates/service.yaml create mode 100644 code-server-chart/templates/serviceaccount.yaml create mode 100644 code-server-chart/templates/tests/test-connection.yaml create mode 100644 code-server-chart/values.schema.json create mode 100644 code-server-chart/values.yaml create mode 100755 scripts/helm_publish.sh create mode 100644 vllm-serve/.helmignore create mode 100644 vllm-serve/Chart.yaml create mode 100644 vllm-serve/templates/NOTES.txt create mode 100644 vllm-serve/templates/_helpers.tpl create mode 100644 vllm-serve/templates/lws.yaml create mode 100644 vllm-serve/templates/nfs-pvc.yaml create mode 100644 vllm-serve/templates/services.yaml create mode 100644 vllm-serve/templates/single.yaml create mode 100644 vllm-serve/values.schema.json create mode 100644 vllm-serve/values.yaml diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..941c866 --- /dev/null +++ b/.env.example @@ -0,0 +1,16 @@ +# Required: Helm OCI namespace to push to (host/org or host/user) +# Examples: ghcr.io/your-org, gitea.example.com/your-user +HELM_OCI_NAMESPACE= + +# Optional: Registry credentials (if not already logged in via `helm registry login`) +HELM_USERNAME= +HELM_PASSWORD= + +# Optional: Space separated chart dirs. If unset, script auto-discovers +# CHART_DIRS="code-server-chart vllm-serve" + +# Optional: Set to 1 to only build locally without pushing +# DRY_RUN=1 + +# Optional: Extra flags for helm push +# HELM_PUSH_EXTRA_ARGS="--insecure-skip-tls-verify" diff --git a/.gitea/workflows/helm-publish.yml b/.gitea/workflows/helm-publish.yml new file mode 100644 index 0000000..4e5a604 --- /dev/null +++ b/.gitea/workflows/helm-publish.yml @@ -0,0 +1,28 @@ +name: Helm Publish + +on: + push: + branches: + - "**" + +jobs: + package-and-push: + runs-on: ubuntu-latest + env: + HELM_OCI_NAMESPACE: ${{ secrets.HELM_OCI_NAMESPACE }} + HELM_USERNAME: ${{ secrets.HELM_USERNAME }} + HELM_PASSWORD: ${{ secrets.HELM_PASSWORD }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Helm + uses: azure/setup-helm@v4 + with: + token: ${{ github.token }} + version: v3.12.3 + + - name: Package and Push Charts + run: | + set -euo pipefail + bash scripts/helm_publish.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c191b0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Helm artifacts +*.tgz +**/.packages/ + +# Local env +.env + +# OS/editor +.DS_Store +.idea/ +.vscode/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..44bfc95 --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ +# helm-charts repo + +This repository manages two Helm charts (e.g., `code-server-chart`, `vllm-serve`) and automates packaging and publishing on every `git push`. + +## How it works + +- Local pre-push hook runs `scripts/helm_publish.sh` to `helm package` and `helm push` all charts found under this folder (directories containing a `Chart.yaml`). +- A Gitea Actions workflow at `.gitea/workflows/helm-publish.yml` does the same on the server side for each push. + +## Configure registry + +1. Copy `.env.example` to `.env` and fill in: + +``` +HELM_OCI_NAMESPACE=gitea.example.com/your-user +HELM_USERNAME=your-username +HELM_PASSWORD=your-token-or-password +``` + +2. For CI, add these as repository secrets in Gitea: + - `HELM_OCI_NAMESPACE` + - `HELM_USERNAME` + - `HELM_PASSWORD` + +The charts are pushed to `oci://$HELM_OCI_NAMESPACE` (Helm appends the chart name and version). + +## Chart discovery + +No configuration needed by default. The script auto-discovers chart directories by looking for `Chart.yaml` up to depth 2 (excluding nested `charts/` vendor dir). + +Optionally pin which chart directories to process: + +``` +export CHART_DIRS="code-server-chart vllm-serve" +``` + +## Run manually + +``` +./scripts/helm_publish.sh +``` + +Use `DRY_RUN=1` to only package locally: + +``` +DRY_RUN=1 ./scripts/helm_publish.sh +``` diff --git a/code-server-chart/Chart.yaml b/code-server-chart/Chart.yaml new file mode 100644 index 0000000..96e0aed --- /dev/null +++ b/code-server-chart/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: code-server +description: A Helm chart for coder/code-server + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.0.1 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 4.103.2 diff --git a/code-server-chart/README.md b/code-server-chart/README.md new file mode 100644 index 0000000..9e0191d --- /dev/null +++ b/code-server-chart/README.md @@ -0,0 +1,2 @@ +# code-server-chart + diff --git a/code-server-chart/templates/NOTES.txt b/code-server-chart/templates/NOTES.txt new file mode 100644 index 0000000..45c9aed --- /dev/null +++ b/code-server-chart/templates/NOTES.txt @@ -0,0 +1,24 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ . }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "code-server.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "code-server.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "code-server.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl port-forward --namespace {{ .Release.Namespace }} service/{{ include "code-server.fullname" . }} 8080:http +{{- end }} + +Administrator credentials: + + Password: echo $(kubectl get secret --namespace {{ .Release.Namespace }} {{ template "code-server.fullname" . }} -o jsonpath="{.data.password}" | base64 --decode) diff --git a/code-server-chart/templates/_helpers.tpl b/code-server-chart/templates/_helpers.tpl new file mode 100644 index 0000000..bb36e8c --- /dev/null +++ b/code-server-chart/templates/_helpers.tpl @@ -0,0 +1,63 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "code-server.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "code-server.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "code-server.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "code-server.labels" -}} +helm.sh/chart: {{ include "code-server.chart" . }} +{{ include "code-server.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "code-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "code-server.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "code-server.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "code-server.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/code-server-chart/templates/deployment.yaml b/code-server-chart/templates/deployment.yaml new file mode 100644 index 0000000..b0bde86 --- /dev/null +++ b/code-server-chart/templates/deployment.yaml @@ -0,0 +1,190 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "code-server.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + replicas: {{ .Values.replicaCount | default 1 }} + strategy: + type: Recreate + selector: + matchLabels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + {{- if .Values.podAnnotations }} + annotations: {{- toYaml .Values.podAnnotations | nindent 8 }} + {{- end }} + spec: + imagePullSecrets: {{- toYaml .Values.imagePullSecrets | nindent 8 }} + {{- if .Values.hostnameOverride }} + hostname: {{ .Values.hostnameOverride }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- if .Values.securityContext.enabled }} + securityContext: + fsGroup: {{ .Values.securityContext.fsGroup }} + {{- end }} + {{- if or (and .Values.volumePermissions.enabled .Values.persistence.enabled) .Values.extraInitContainers }} + initContainers: + {{- if and .Values.volumePermissions.enabled .Values.persistence.enabled }} + - name: init-chmod-data + image: busybox:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + chown -R {{ .Values.securityContext.runAsUser }}:{{ .Values.securityContext.fsGroup }} /home/coder + securityContext: + runAsUser: {{ .Values.volumePermissions.securityContext.runAsUser }} + volumeMounts: + - name: data + mountPath: /home/coder + {{- end }} +{{- if .Values.extraInitContainers }} +{{ tpl .Values.extraInitContainers . | indent 6}} +{{- end }} + {{- end }} + containers: +{{- if .Values.extraContainers }} +{{ tpl .Values.extraContainers . | indent 8}} +{{- end }} + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.securityContext.enabled }} + securityContext: + runAsUser: {{ .Values.securityContext.runAsUser }} + {{- end }} + {{- if .Values.lifecycle.enabled }} + lifecycle: + {{- if .Values.lifecycle.postStart }} + postStart: + {{ toYaml .Values.lifecycle.postStart | nindent 14 }} + {{- end }} + {{- if .Values.lifecycle.preStop }} + preStop: + {{ toYaml .Values.lifecycle.preStop | nindent 14 }} + {{- end }} + {{- end }} + env: + {{- if .Values.extraVars }} +{{ toYaml .Values.extraVars | indent 10 }} + {{- end }} + - name: PASSWORD + valueFrom: + secretKeyRef: + {{- if .Values.existingSecret }} + name: {{ .Values.existingSecret }} + {{- else }} + name: {{ template "code-server.fullname" . }} + {{- end }} + key: password + {{- if .Values.extraArgs }} + args: +{{ toYaml .Values.extraArgs | indent 10 }} + {{- end }} + volumeMounts: + - name: data + mountPath: /home/coder + {{- range .Values.extraConfigmapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.extraVolumeMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + {{- range .Values.extraPorts }} + - name: {{ .name }} + containerPort: {{ .port }} + protocol: {{ .protocol }} + {{- end }} + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- tpl . $ | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ template "code-server.serviceAccountName" . }} + volumes: + - name: data + {{- if .Values.persistence.enabled }} + {{- if not .Values.persistence.hostPath }} + persistentVolumeClaim: + claimName: {{ .Values.persistence.existingClaim | default (include "code-server.fullname" .) }} + {{- else }} + hostPath: + path: {{ .Values.persistence.hostPath }} + type: Directory + {{- end -}} + {{- else }} + emptyDir: {} + {{- end -}} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + defaultMode: {{ .defaultMode }} + {{- end }} + {{- range .Values.extraConfigmapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} + defaultMode: {{ .defaultMode }} + {{- end }} + {{- range .Values.extraVolumeMounts }} + - name: {{ .name }} + {{- if .existingClaim }} + persistentVolumeClaim: + claimName: {{ .existingClaim }} + {{- else if .hostPath }} + hostPath: + path: {{ .hostPath }} + type: Directory + {{- else }} + emptyDir: + {{- toYaml .emptyDir | nindent 10 }} + {{- end }} + {{- end }} diff --git a/code-server-chart/templates/ingress.yaml b/code-server-chart/templates/ingress.yaml new file mode 100644 index 0000000..1da4320 --- /dev/null +++ b/code-server-chart/templates/ingress.yaml @@ -0,0 +1,63 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "code-server.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "code-server.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.ingressClassName }} + ingressClassName: {{ .Values.ingress.ingressClassName }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion -}} + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ . }} + pathType: Prefix + backend: + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- end }} + {{- end }} + {{- else -}} + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ . }} + backend: + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/code-server-chart/templates/pvc.yaml b/code-server-chart/templates/pvc.yaml new file mode 100644 index 0000000..2f1c874 --- /dev/null +++ b/code-server-chart/templates/pvc.yaml @@ -0,0 +1,29 @@ +{{- if and (and .Values.persistence.enabled (not .Values.persistence.existingClaim)) (not .Values.persistence.hostPath) }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ include "code-server.fullname" . }} + namespace: {{ .Release.Namespace }} +{{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} +{{- if .Values.persistence.storageClass }} +{{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" +{{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" +{{- end }} +{{- end }} +{{- end }} diff --git a/code-server-chart/templates/secrets.yaml b/code-server-chart/templates/secrets.yaml new file mode 100644 index 0000000..ae59be4 --- /dev/null +++ b/code-server-chart/templates/secrets.yaml @@ -0,0 +1,20 @@ +{{- if not .Values.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "code-server.fullname" . }} + annotations: + "helm.sh/hook": "pre-install" + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +type: Opaque +data: + {{- if .Values.password }} + password: "{{ .Values.password | b64enc }}" + {{- else }} + password: "{{ randAlphaNum 24 | b64enc }}" + {{- end }} +{{- end }} diff --git a/code-server-chart/templates/service.yaml b/code-server-chart/templates/service.yaml new file mode 100644 index 0000000..d5a3c5e --- /dev/null +++ b/code-server-chart/templates/service.yaml @@ -0,0 +1,25 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "code-server.fullname" . }} + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + {{- range .Values.extraPorts }} + - port: {{ .port }} + targetPort: {{ .port }} + protocol: {{ .protocol }} + name: {{ .name }} + {{- end }} + selector: + app.kubernetes.io/name: {{ include "code-server.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} diff --git a/code-server-chart/templates/serviceaccount.yaml b/code-server-chart/templates/serviceaccount.yaml new file mode 100644 index 0000000..df9e1e3 --- /dev/null +++ b/code-server-chart/templates/serviceaccount.yaml @@ -0,0 +1,11 @@ +{{- if or .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + name: {{ template "code-server.serviceAccountName" . }} +{{- end -}} diff --git a/code-server-chart/templates/tests/test-connection.yaml b/code-server-chart/templates/tests/test-connection.yaml new file mode 100644 index 0000000..2e67f56 --- /dev/null +++ b/code-server-chart/templates/tests/test-connection.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "code-server.fullname" . }}-test-connection" + labels: + app.kubernetes.io/name: {{ include "code-server.name" . }} + helm.sh/chart: {{ include "code-server.chart" . }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/managed-by: {{ .Release.Service }} + annotations: + "helm.sh/hook": test-success +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "code-server.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/code-server-chart/values.schema.json b/code-server-chart/values.schema.json new file mode 100644 index 0000000..680a6cf --- /dev/null +++ b/code-server-chart/values.schema.json @@ -0,0 +1,93 @@ +{ + "type": "object", + "required": ["password"], + "properties": { + "image": { + "type": "object", + "description": "Container image configuration.", + "properties": { + "repository": { + "type": "string", + "description": "Container image repository.", + "default": "codercom/code-server", + "examples": ["codercom/code-server"] + }, + "tag":{ + "type": "string", + "description": "Container image tag.", + "default": "4.103.2" + } + } + }, + "resources": { + "type": "object", + "description": "Container resource requests and limits, including CPU, Memory, and GPU.", + "properties": { + "limits": { + "type": "object", + "description": "Resource limits.", + "properties": { + "cpu": { + "type": "string" + }, + "memory": { + "type": "string" + }, + "nvidia.com/gpu": { + "type": "integer", + "description": "Number of NVIDIA GPUs (integer).", + "default": 0, + "minimum": 0 + } + }, + "additionalProperties": true + }, + "requests": { + "type": "object", + "description": "Resource requests.", + "properties": { + "cpu": { + "type": "string" + }, + "memory": { + "type": "string" + } + }, + "additionalProperties": true + } + } + }, + "password": { + "type": "string", + "description": "Password for the admin user.", + "examples": ["password"] + }, + "persistence": { + "type": "object", + "description": "Persistent volume configuration.", + "properties": { + "storageClass": { + "type": "string", + "description": "Storage class name.", + "default": "standard", + "enum": ["nas-nfs","csi-cephfs-sc","sl-sc"], + "examples": ["nas-nfs","csi-cephfs-sc"] + }, + "size": { + "type": "string", + "description": "Persistent volume size.", + "default": "10Gi", + "examples": ["10Gi","20Gi"] + }, + "existingClaim": { + "type": "string", + "description": "Existing persistent volume claim name.", + "examples": ["pvc-12345678-1234-1234-1234-123456789012"] + } + } + }, + "nodeSelector": { + "type": "object" + } + } +} diff --git a/code-server-chart/values.yaml b/code-server-chart/values.yaml new file mode 100644 index 0000000..8e0f563 --- /dev/null +++ b/code-server-chart/values.yaml @@ -0,0 +1,226 @@ +# Default values for code-server. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +image: + repository: codercom/code-server + tag: '4.103.2' + # repository: ghcr.io/kubeflow/kubeflow/notebook-servers/codeserver + # tag: "latest" + pullPolicy: IfNotPresent + +# Specifies one or more secrets to be used when pulling images from a +# private container repository +# https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry +imagePullSecrets: [] +# - name: registry-creds + +nameOverride: "" +fullnameOverride: "" +hostnameOverride: "" + +# The existing secret to use for code-server authentication in the frontend. the password is stored in the secret under the key `password` +# existingSecret: "" +password: "" +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +priorityClassName: "" + +service: + type: LoadBalancer + port: 80 + +ingress: + enabled: false + #annotations: + # kubernetes.io/tls-acme: "true" + #hosts: + # - host: code-server.example.loc + # paths: + # - / + ingressClassName: "" + #tls: + # - secretName: code-server + # hosts: + # - code-server.example.loc + +# Optional additional arguments +extraArgs: [] + # These are the arguments normally passed to code-server; run + # code-server --help for a list of available options. + # + # Each argument and parameter must have its own entry; if you use + # --param value on the command line, then enter it here as: + # + # - --param + # - value + # + # If you receive an error like "Unknown option --param value", it may be + # because both the parameter and value are specified as a single argument, + # rather than two separate arguments (e.g. "- --param value" on a line). + +# Optional additional environment variables +extraVars: [] +# - name: DISABLE_TELEMETRY +# value: "true" +# if dind is desired: +# - name: DOCKER_HOST +# value: "tcp://localhost:2376" + +## +## Init containers parameters: +## volumePermissions: Change the owner of the persist volume mountpoint to RunAsUser:fsGroup +## +volumePermissions: + enabled: false + securityContext: + runAsUser: 0 + +## Pod Security Context +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ +## +securityContext: + enabled: true + fsGroup: 1000 + runAsUser: 1000 + +resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 4000m + memory: 8192Mi + nvidia.com/gpu: "1" + requests: + cpu: 2000m + memory: 4000Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +## Persist data to a persistent volume +persistence: + enabled: true + ## code-server data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + storageClass: "nas-nfs" + accessMode: ReadWriteOnce + size: 10Gi + annotations: {} + existingClaim: "" + # hostPath: /data + +lifecycle: + enabled: false + # postStart: + # exec: + # command: + # - /bin/bash + # - -c + # - curl -s -L SOME_SCRIPT | bash + + # for dind, the following may be helpful + # postStart: + # exec: + # command: + # - /bin/sh + # - -c + # - | + # sudo apt-get update \ + # && sudo apt-get install -y docker.io + +## Enable an Specify container in extraContainers. +## This is meant to allow adding code-server dependencies, like docker-dind. +extraContainers: | +# If docker-dind is used, DOCKER_HOST env is mandatory to set in "extraVars" +# - name: docker-dind +# image: docker:28.3.2-dind +# imagePullPolicy: IfNotPresent +# resources: +# requests: +# cpu: 1 +# ephemeral-storage: "50Gi" +# memory: 10Gi +# securityContext: +# privileged: true +# procMount: Default +# env: +# - name: DOCKER_TLS_CERTDIR +# value: "" # disable TLS setup +# command: +# - dockerd +# - --host=unix:///var/run/docker.sock +# - --host=tcp://0.0.0.0:2376 + + +extraInitContainers: | +# - name: customization +# image: {{ .Values.image.repository }}:{{ .Values.image.tag }} +# imagePullPolicy: IfNotPresent +# env: +# - name: SERVICE_URL +# value: https://open-vsx.org/vscode/gallery +# - name: ITEM_URL +# value: https://open-vsx.org/vscode/item +# command: +# - sh +# - -c +# - | +# code-server --install-extension ms-python.python +# code-server --install-extension golang.Go +# volumeMounts: +# - name: data +# mountPath: /home/coder + +## Additional code-server secret mounts +extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # subPath: private.key # (optional) + # secretName: code-server-secret-files + # readOnly: true + +## Additional code-server volume mounts +extraVolumeMounts: [] + # - name: extra-volume + # mountPath: /mnt/volume + # readOnly: true + # existingClaim: volume-claim + # hostPath: "" + # emptyDir: {} + +extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /etc/code-server/ssl/ + # subPath: certificates.crt # (optional) + # configMap: certs-configmap + # readOnly: true + +extraPorts: [] + # - name: minecraft + # port: 25565 + # protocol: tcp diff --git a/scripts/helm_publish.sh b/scripts/helm_publish.sh new file mode 100755 index 0000000..1f6e362 --- /dev/null +++ b/scripts/helm_publish.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Package and push all Helm charts found under the repo +# Required env: +# HELM_OCI_NAMESPACE e.g. ghcr.io/OWNER or gitea.example.com/OWNER +# Optional env: +# HELM_USERNAME / HELM_PASSWORD for registry login +# CHART_DIRS: space-separated list of chart directories; if empty, auto-discover +# DRY_RUN=1: only package, do not push +# HELM_PUSH_EXTRA_ARGS: extra flags for `helm push` (e.g., --insecure-skip-tls-verify) + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +if [[ -z "${HELM_OCI_NAMESPACE:-}" ]]; then + echo "[helm_publish] HELM_OCI_NAMESPACE not set (e.g., ghcr.io/owner or gitea.example.com/owner)." >&2 + exit 1 +fi + +# Derive registry host from HELM_OCI_NAMESPACE +HELM_REGISTRY_HOST="${HELM_OCI_NAMESPACE%%/*}" + +# Auto-discover charts when CHART_DIRS not provided +if [[ -z "${CHART_DIRS:-}" ]]; then + mapfile -t chart_paths < <(find . -maxdepth 2 -type f -name Chart.yaml -not -path "*/charts/*" | sort) + if [[ ${#chart_paths[@]} -eq 0 ]]; then + echo "[helm_publish] No charts found (no Chart.yaml)." >&2 + exit 1 + fi + CHART_DIRS="" + for p in "${chart_paths[@]}"; do + d="$(dirname "$p")" + CHART_DIRS+=" ${d#./}" + done +fi + +# Login if credentials present +if [[ -n "${HELM_USERNAME:-}" && -n "${HELM_PASSWORD:-}" ]]; then + echo "[helm_publish] Logging into registry ${HELM_REGISTRY_HOST} as ${HELM_USERNAME}" + helm registry login "$HELM_REGISTRY_HOST" -u "$HELM_USERNAME" -p "$HELM_PASSWORD" +else + echo "[helm_publish] HELM_USERNAME/HELM_PASSWORD not set; assuming registry creds already configured" +fi + +status=0 +for chart_dir in ${CHART_DIRS}; do + if [[ ! -f "$chart_dir/Chart.yaml" ]]; then + echo "[helm_publish] Skip $chart_dir (no Chart.yaml)" + continue + fi + echo "[helm_publish] Processing chart: $chart_dir" + + # Ensure dependencies are built + if [[ -f "$chart_dir/Chart.yaml" ]]; then + helm dependency build "$chart_dir" || true + fi + + pkg_out_dir="$chart_dir/.packages" + mkdir -p "$pkg_out_dir" + + # Lint chart (non-fatal) + if ! helm lint "$chart_dir"; then + echo "[helm_publish] Warning: helm lint failed for $chart_dir" + fi + + # Package chart + pkg_path=$(helm package "$chart_dir" --destination "$pkg_out_dir" | awk '{print $NF}') + if [[ ! -f "$pkg_path" ]]; then + echo "[helm_publish] Failed to package $chart_dir" >&2 + status=1 + continue + fi + echo "[helm_publish] Packaged: $pkg_path" + + if [[ "${DRY_RUN:-}" == "1" ]]; then + echo "[helm_publish] DRY_RUN enabled; skip push for $pkg_path" + continue + fi + + # Push to OCI registry; Helm will use chart name from the package + echo "[helm_publish] Pushing $pkg_path to oci://$HELM_OCI_NAMESPACE" + if ! helm push ${HELM_PUSH_EXTRA_ARGS:-} "$pkg_path" "oci://$HELM_OCI_NAMESPACE"; then + echo "[helm_publish] Push failed for $pkg_path" >&2 + status=1 + fi + +done + +exit $status diff --git a/vllm-serve/.helmignore b/vllm-serve/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/vllm-serve/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/vllm-serve/Chart.yaml b/vllm-serve/Chart.yaml new file mode 100644 index 0000000..a27dca4 --- /dev/null +++ b/vllm-serve/Chart.yaml @@ -0,0 +1,8 @@ +annotations: + helm.sh/resource-policy: keep +apiVersion: v2 +appVersion: 1.16.0 +description: A Helm chart for deploying vLLM with NFS storage +name: vllm-serve +type: application +version: 0.2.0 diff --git a/vllm-serve/templates/NOTES.txt b/vllm-serve/templates/NOTES.txt new file mode 100644 index 0000000..9b881b5 --- /dev/null +++ b/vllm-serve/templates/NOTES.txt @@ -0,0 +1,16 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.svc.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.svc.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.svc.port }} +{{- else if contains "ClusterIP" .Values.svc.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/vllm-serve/templates/_helpers.tpl b/vllm-serve/templates/_helpers.tpl new file mode 100644 index 0000000..eb21024 --- /dev/null +++ b/vllm-serve/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "vllm-serve.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "vllm-serve.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "vllm-serve.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "vllm-serve.labels" -}} +helm.sh/chart: {{ include "vllm-serve.chart" . }} +{{ include "vllm-serve.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "vllm-serve.selectorLabels" -}} +app.kubernetes.io/name: {{ include "vllm-serve.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "vllm-serve.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "vllm-serve.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/vllm-serve/templates/lws.yaml b/vllm-serve/templates/lws.yaml new file mode 100644 index 0000000..84e3b1e --- /dev/null +++ b/vllm-serve/templates/lws.yaml @@ -0,0 +1,193 @@ +{{- if gt (int .Values.workerSize) 1 }} +apiVersion: leaderworkerset.x-k8s.io/v1 +kind: LeaderWorkerSet +metadata: + name: {{ .Release.Name }} +spec: + replicas: {{ .Values.replicaCount }} + leaderWorkerTemplate: + size: {{ .Values.workerSize }} + restartPolicy: RecreateGroupOnPodRestart + leaderTemplate: + metadata: + labels: + role: leader + spec: + initContainers: + # 模型下载作为第一个 initContainer + - name: download-model + image: {{ .Values.model.download.image }} + imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} + env: + - name: HF_ENDPOINT + value: https://hf-mirror.com + - name: HUGGING_FACE_HUB_TOKEN + value: {{ .Values.model.huggingfaceToken }} + command: + - sh + - -c + - | + MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") + DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME" + # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" + # 检查模型是否存在,不存在则下载 + echo "DEST_DIR= $DEST_DIR" + if [ ! -f "$DEST_DIR/config.json" ]; then + ls -l {{ .Values.model.localMountPath }} + echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" + wget https://hf-mirror.com/hfd/hfd.sh + chmod a+x hfd.sh + apt update && apt upgrade + apt install aria2 -y + ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" + # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" + else + echo "Model already exists at $DEST_DIR" + fi + volumeMounts: + - name: weight-volume + mountPath: {{ .Values.model.localMountPath }} + containers: + - name: vllm-leader + image: {{ .Values.vllm.image }} + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: [ "IPC_LOCK" ] + env: + # - name: HUGGING_FACE_HUB_TOKEN + # value: {{ .Values.vllm.huggingfaceToken }} + - name: GLOO_SOCKET_IFNAME + value: eth0 + - name: NCCL_SOCKET_IFNAME + value: eth0 + - name: NCCL_IB_DISABLE + value: "0" + - name: NCCL_DEBUG + value: INFO + - name: NCCL_IB_HCA + value: mlx5_0:1 + - name: NCCL_IB_GID_INDEX + value: "0" # 或 "7",根据你的网络配置而定 + - name: RAY_DEDUP_LOGS + value: "0" + command: + - sh + - -c + {{- if .Values.command }} + - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); {{ .Values.command }}" + {{- else }} + - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); + MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; + python3 -m vllm.entrypoints.openai.api_server --port 8000 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" + {{- end }} + resources: + limits: + nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + memory: {{ .Values.resources.memoryLimit }} + ephemeral-storage: 10Gi + rdma/rdma_shared_device_a: 10 + requests: + ephemeral-storage: 10Gi + cpu: {{ .Values.resources.cpuRequest }} + ports: + - containerPort: 8000 + name: http + readinessProbe: + tcpSocket: + #httpGet: + #path: /health + port: 8000 + initialDelaySeconds: 120 + periodSeconds: 20 + timeoutSeconds: 5 + volumeMounts: + - mountPath: /dev/shm + name: dshm + - name: weight-volume + mountPath: {{ .Values.model.localMountPath }} + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.resources.shmSize }} + - name: weight-volume + persistentVolumeClaim: + claimName: {{ .Release.Name }}-pvc-model + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 10 }} + {{- end }} + workerTemplate: + spec: + containers: + - name: vllm-worker + image: {{ .Values.vllm.image }} + imagePullPolicy: IfNotPresent + securityContext: + capabilities: + add: [ "IPC_LOCK" ] + command: + - sh + - -c + - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh worker --ray_address=$(LWS_LEADER_ADDRESS)" + resources: + limits: + nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + memory: {{ .Values.resources.memoryLimit }} + ephemeral-storage: 10Gi + rdma/rdma_shared_device_a: 10 + requests: + ephemeral-storage: 10Gi + cpu: {{ .Values.resources.cpuRequest }} + env: + # - name: HUGGING_FACE_HUB_TOKEN + # value: {{ .Values.vllm.huggingfaceToken }} + - name: GLOO_SOCKET_IFNAME + value: eth0 + - name: NCCL_SOCKET_IFNAME + value: eth0 + - name: NCCL_IB_DISABLE + value: "0" + - name: NCCL_DEBUG + value: INFO + - name: NCCL_IB_HCA + value: mlx5_0:1 + - name: NCCL_IB_GID_INDEX + value: "0" # 或 "7",根据你的网络配置而定 + - name: RAY_DEDUP_LOGS + value: "0" + volumeMounts: + - mountPath: /dev/shm + name: dshm + - name: weight-volume + mountPath: {{ .Values.model.localMountPath }} + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.resources.shmSize }} + - name: weight-volume + persistentVolumeClaim: + claimName: {{ .Release.Name }}-pvc-model + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 10 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/vllm-serve/templates/nfs-pvc.yaml b/vllm-serve/templates/nfs-pvc.yaml new file mode 100644 index 0000000..1fe65a1 --- /dev/null +++ b/vllm-serve/templates/nfs-pvc.yaml @@ -0,0 +1,40 @@ +#apiVersion: v1 +#kind: PersistentVolume +#metadata: +# name: {{ .Release.Name }}-pv-model +#spec: +# storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }} +# capacity: +# storage: {{ .Values.nfs.pvSize }} +# accessModes: +# - ReadWriteMany +# persistentVolumeReclaimPolicy: Retain +# # nfs: +# # path: {{ .Values.nfs.path }} +# # server: {{ .Values.nfs.server }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-pvc-model + # annotations: +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: {{ .Values.nfs.pvcSize }} + # volumeName: {{ .Release.Name }}-pv-model + storageClassName: weight + +#apiVersion: v1 +#kind: PersistentVolumeClaim +#metadata: +# name: {{ .Release.Name }}-pvc-model +#spec: +# accessModes: +# - ReadWriteMany +# resources: +# requests: +# storage: 20Gi +# storageClassName: nas-dataset diff --git a/vllm-serve/templates/services.yaml b/vllm-serve/templates/services.yaml new file mode 100644 index 0000000..b0daa30 --- /dev/null +++ b/vllm-serve/templates/services.yaml @@ -0,0 +1,35 @@ +#apiVersion: v1 +#kind: Service +#metadata: +# name: infer-leader-loadbalancer +#spec: +# type: LoadBalancer +# selector: +# leaderworkerset.sigs.k8s.io/name: infer +# role: leader +# ports: +# - protocol: TCP +# port: 8080 +# targetPort: 8080 +# +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ .Release.Name }}-svc +spec: + type: {{ .Values.svc.type | default "LoadBalancer" }} + {{- if gt (int .Values.workerSize) 1 }} + selector: + leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} + role: leader + {{- else }} + selector: + app: {{ .Release.Name }} + {{- end }} + ports: + - protocol: TCP + port: {{ .Values.svc.port | default 80 }} + targetPort: http # {{ .Values.svc.targetPort | default 8080 }} + # nodePort: {{ .Values.svc.nodePort | default 30080 }} + diff --git a/vllm-serve/templates/single.yaml b/vllm-serve/templates/single.yaml new file mode 100644 index 0000000..15648b7 --- /dev/null +++ b/vllm-serve/templates/single.yaml @@ -0,0 +1,127 @@ +{{- if eq (int .Values.workerSize) 1 }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ .Release.Name }} + template: + metadata: + labels: + app: {{ .Release.Name }} + spec: + initContainers: + # 模型下载作为第一个 initContainer + - name: download-model + image: {{ .Values.model.download.image }} + imagePullPolicy: IfNotPresent + env: + - name: HF_ENDPOINT + value: https://hf-mirror.com + - name: HUGGING_FACE_HUB_TOKEN + value: {{ .Values.model.huggingfaceToken }} + command: + - sh + - -c + - | + MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") + DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME" + # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" + # 检查模型是否存在,不存在则下载 + echo "DEST_DIR= $DEST_DIR" + if [ ! -f "$DEST_DIR/config.json" ]; then + ls -l {{ .Values.model.localMountPath }} + echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" + wget https://hf-mirror.com/hfd/hfd.sh + chmod a+x hfd.sh + apt update && apt upgrade + apt install aria2 -y + ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" + # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" + else + echo "Model already exists at $DEST_DIR" + fi + volumeMounts: + - name: weight-volume + mountPath: {{ .Values.model.localMountPath }} + containers: + - name: vllm-pod + image: {{ .Values.vllm.image }} + imagePullPolicy: IfNotPresent + env: + - name: HUGGING_FACE_HUB_TOKEN + value: {{ .Values.vllm.huggingfaceToken }} + - name: RAY_DEDUP_LOGS + value: "0" + command: + - sh + - -c + {{- if .Values.command }} + - {{ .Values.command | quote }} + {{- else }} + - | + MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); + MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; + python3 -m vllm.entrypoints.openai.api_server \ + --port 8000 \ + --model $MODEL_PATH \ + --tensor-parallel-size {{ .Values.resources.gpuLimit }} \ + --pipeline_parallel_size {{ .Values.workerSize }} \ + --trust_remote_code + {{- end }} + # - " + # MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); + # MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; + # python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" + resources: + limits: + nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + memory: {{ .Values.resources.memoryLimit }} + ephemeral-storage: 10Gi + requests: + ephemeral-storage: 10Gi + cpu: {{ .Values.resources.cpuRequest }} + ports: + - containerPort: 8000 + name: http + readinessProbe: + #tcpSocket: + httpGet: + path: /health + port: 8000 + initialDelaySeconds: 120 + periodSeconds: 20 + timeoutSeconds: 5 + volumeMounts: + - mountPath: /dev/shm + name: dshm + - name: weight-volume + mountPath: {{ .Values.model.localMountPath }} + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: {{ .Values.resources.shmSize }} + - name: weight-volume + persistentVolumeClaim: + claimName: {{ .Release.Name }}-pvc-model + # - name: weight-volume + # nfs: + # path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" + # server: "10.6.80.11" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/vllm-serve/values.schema.json b/vllm-serve/values.schema.json new file mode 100644 index 0000000..c9d337c --- /dev/null +++ b/vllm-serve/values.schema.json @@ -0,0 +1,346 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "vllm-serve Helm Chart Values", + "description": "Schema for vllm-serve Helm chart values", + "type": "object", + "properties": { + "model": { + "type": "object", + "description": "模型配置", + "properties": { + "huggingfaceName": { + "type": "string", + "description": "HuggingFace 模型名称", + "default": "Qwen/Qwen2.5-0.5B-Instruct", + "enum": [ + "swiss-ai/Apertus-8B-2509", + "swiss-ai/Apertus-70B-Instruct-2509", + "BAAI/Aquila-7B", + "BAAI/AquilaChat-7B", + "arcee-ai/AFM-4.5B-Base", + "Snowflake/snowflake-arctic-base", + "Snowflake/snowflake-arctic-instruct", + "baichuan-inc/Baichuan2-13B-Chat", + "baichuan-inc/Baichuan-7B", + "inclusionAI/Ling-lite-1.5", + "inclusionAI/Ling-plus", + "inclusionAI/Ling-mini-2.0", + "ibm-ai-platform/Bamba-9B-fp8", + "ibm-ai-platform/Bamba-9B", + "bigscience/bloom", + "bigscience/bloomz", + "zai-org/chatglm2-6b", + "zai-org/chatglm3-6b", + "CohereLabs/c4ai-command-r-v01", + "CohereLabs/c4ai-command-r7b-12-2024", + "CohereLabs/c4ai-command-a-03-2025", + "CohereLabs/command-a-reasoning-08-2025", + "databricks/dbrx-base", + "databricks/dbrx-instruct", + "nvidia/Llama-3_3-Nemotron-Super-49B-v1", + "deepseek-ai/deepseek-llm-67b-base", + "deepseek-ai/deepseek-llm-7b-chat", + "deepseek-ai/DeepSeek-V2", + "deepseek-ai/DeepSeek-V2-Chat", + "deepseek-ai/DeepSeek-V3", + "deepseek-ai/DeepSeek-R1", + "deepseek-ai/DeepSeek-V3.1", + "rednote-hilab/dots.llm1.base", + "rednote-hilab/dots.llm1.inst", + "rednote-hilab/dots.ocr", + "baidu/ERNIE-4.5-0.3B-PT", + "baidu/ERNIE-4.5-21B-A3B-PT", + "baidu/ERNIE-4.5-300B-A47B-PT", + "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "LGAI-EXAONE/EXAONE-4.0-32B", + "mgleize/fairseq2-dummy-Llama-3.2-1B", + "tiiuae/falcon-7b", + "tiiuae/falcon-40b", + "tiiuae/falcon-rw-7b", + "tiiuae/falcon-mamba-7b", + "tiiuae/falcon-mamba-7b-instruct", + "tiiuae/Falcon-H1-34B-Base", + "tiiuae/Falcon-H1-34B-Instruct", + "allenai/FlexOlmo-7x7B-1T", + "allenai/FlexOlmo-7x7B-1T-RT", + "google/gemma-2b", + "google/gemma-1.1-2b-it", + "google/gemma-2-9b", + "google/gemma-2-27b", + "google/gemma-3-1b-it", + "google/gemma-3n-E2B-it", + "google/gemma-3n-E4B-it", + "zai-org/glm-4-9b-chat-hf", + "zai-org/GLM-4-32B-0414", + "zai-org/GLM-4.5", + "gpt2", + "gpt2-xl", + "bigcode/starcoder", + "bigcode/gpt_bigcode-santacoder", + "WizardLM/WizardCoder-15B-V1.0", + "EleutherAI/gpt-j-6b", + "nomic-ai/gpt4all-j", + "EleutherAI/gpt-neox-20b", + "EleutherAI/pythia-12b", + "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "databricks/dolly-v2-12b", + "stabilityai/stablelm-tuned-alpha-7b", + "openai/gpt-oss-120b", + "openai/gpt-oss-20b", + "ibm-granite/granite-3.0-2b-base", + "ibm-granite/granite-3.1-8b-instruct", + "ibm/PowerLM-3b", + "ibm-granite/granite-3.0-1b-a400m-base", + "ibm-granite/granite-3.0-3b-a800m-instruct", + "ibm/PowerMoE-3b", + "ibm-granite/granite-4.0-tiny-preview", + "parasail-ai/GritLM-7B-vllm", + "hpcai-tech/grok-1", + "tencent/Hunyuan-7B-Instruct", + "tencent/Hunyuan-A13B-Instruct", + "tencent/Hunyuan-A13B-Pretrain", + "tencent/Hunyuan-A13B-Instruct-FP8", + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", + "internlm/internlm-7b", + "internlm/internlm-chat-7b", + "internlm/internlm2-7b", + "internlm/internlm2-chat-7b", + "internlm/internlm3-8b-instruct", + "inceptionai/jais-13b", + "inceptionai/jais-13b-chat", + "inceptionai/jais-30b-v3", + "inceptionai/jais-30b-chat-v3", + "ai21labs/AI21-Jamba-1.5-Large", + "ai21labs/AI21-Jamba-1.5-Mini", + "ai21labs/Jamba-v0.1", + "LiquidAI/LFM2-1.2B", + "LiquidAI/LFM2-700M", + "LiquidAI/LFM2-350M", + "LiquidAI/LFM2-8B-A1B-preview", + "meta-llama/Meta-Llama-3.1-405B-Instruct", + "meta-llama/Meta-Llama-3.1-70B", + "meta-llama/Meta-Llama-3-70B-Instruct", + "meta-llama/Llama-2-70b-hf", + "01-ai/Yi-34B", + "state-spaces/mamba-130m-hf", + "state-spaces/mamba-790m-hf", + "state-spaces/mamba-2.8b-hf", + "mistralai/Mamba-Codestral-7B-v0.1", + "XiaomiMiMo/MiMo-7B-RL", + "openbmb/MiniCPM-2B-sft-bf16", + "openbmb/MiniCPM-2B-dpo-bf16", + "openbmb/MiniCPM-S-1B-sft", + "openbmb/MiniCPM3-4B", + "MiniMaxAI/MiniMax-M2", + "mistralai/Mistral-7B-v0.1", + "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mixtral-8x7B-v0.1", + "mistralai/Mixtral-8x7B-Instruct-v0.1", + "mistral-community/Mixtral-8x22B-v0.1", + "mosaicml/mpt-7b", + "mosaicml/mpt-7b-storywriter", + "mosaicml/mpt-30b", + "nvidia/Minitron-8B-Base", + "mgoin/Nemotron-4-340B-Base-hf-FP8", + "nvidia/Nemotron-H-8B-Base-8K", + "nvidia/Nemotron-H-47B-Base-8K", + "nvidia/Nemotron-H-56B-Base-8K", + "allenai/OLMo-1B-hf", + "allenai/OLMo-7B-hf", + "allenai/OLMo-2-0425-1B", + "allenai/OLMoE-1B-7B-0924", + "allenai/OLMoE-1B-7B-0924-Instruct", + "facebook/opt-66b", + "facebook/opt-iml-max-30b", + "OrionStarAI/Orion-14B-Base", + "OrionStarAI/Orion-14B-Chat", + "microsoft/phi-1_5", + "microsoft/phi-2", + "microsoft/Phi-4-mini-instruct", + "microsoft/Phi-4", + "microsoft/Phi-3-mini-4k-instruct", + "microsoft/Phi-3-mini-128k-instruct", + "microsoft/Phi-3-medium-128k-instruct", + "microsoft/Phi-3.5-MoE-instruct", + "adept/persimmon-8b-base", + "adept/persimmon-8b-chat", + "pfnet/plamo-2-1b", + "pfnet/plamo-2-8b", + "Qwen/Qwen-7B", + "Qwen/Qwen-7B-Chat", + "Qwen/QwQ-32B-Preview", + "Qwen/Qwen2-7B-Instruct", + "Qwen/Qwen2-7B", + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen1.5-MoE-A2.7B", + "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "Qwen/Qwen3-8B", + "Qwen/Qwen3-30B-A3B", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "ByteDance-Seed/Seed-OSS-36B-Instruct", + "stabilityai/stablelm-3b-4e1t", + "stabilityai/stablelm-base-alpha-7b-v2", + "bigcode/starcoder2-3b", + "bigcode/starcoder2-7b", + "bigcode/starcoder2-15b", + "upstage/solar-pro-preview-instruct", + "Tele-AI/TeleChat2-3B", + "Tele-AI/TeleChat2-7B", + "Tele-AI/TeleChat2-35B", + "CofeAI/FLM-2-52B-Instruct-2407", + "CofeAI/Tele-FLM", + "xverse/XVERSE-7B-Chat", + "xverse/XVERSE-13B-Chat", + "xverse/XVERSE-65B-Chat", + "MiniMaxAI/MiniMax-M1-40k", + "MiniMaxAI/MiniMax-M1-80k", + "MiniMaxAI/MiniMax-Text-01", + "Zyphra/Zamba2-7B-instruct", + "Zyphra/Zamba2-2.7B-instruct", + "Zyphra/Zamba2-1.2B-instruct", + "meituan-longcat/LongCat-Flash-Chat", + "meituan-longcat/LongCat-Flash-Chat-FP8", + "rhymes-ai/Aria", + "CohereForAI/aya-vision-8b", + "CohereForAI/aya-vision-32b", + "Open-Bee/Bee-8B-RL", + "Open-Bee/Bee-8B-SFT", + "Salesforce/blip2-opt-2.7b", + "Salesforce/blip2-opt-6.7b", + "facebook/chameleon-7b", + "CohereLabs/command-a-vision-07-2025", + "deepseek-ai/deepseek-vl2-tiny", + "deepseek-ai/deepseek-vl2-small", + "deepseek-ai/deepseek-vl2", + "deepseek-ai/DeepSeek-OCR", + "baidu/ERNIE-4.5-VL-28B-A3B-PT", + "baidu/ERNIE-4.5-VL-424B-A47B-PT", + "adept/fuyu-8b", + "google/gemma-3-4b-it", + "google/gemma-3-27b-it", + "zai-org/glm-4v-9b", + "zai-org/cogagent-9b-20241220", + "zai-org/GLM-4.1V-9B-Thinking", + "zai-org/GLM-4.5V", + "ibm-granite/granite-speech-3.3-8b", + "h2oai/h2ovl-mississippi-800m", + "h2oai/h2ovl-mississippi-2b", + "HuggingFaceM4/Idefics3-8B-Llama3", + "internlm/Intern-S1", + "internlm/Intern-S1-mini", + "OpenGVLab/InternVL3_5-14B", + "OpenGVLab/InternVL3-9B", + "OpenGVLab/InternVideo2_5_Chat_8B", + "OpenGVLab/InternVL2_5-4B", + "OpenGVLab/Mono-InternVL-2B", + "OpenGVLab/InternVL2-4B", + "OpenGVLab/InternVL3-1B-hf", + "Kwai-Keye/Keye-VL-8B-Preview", + "Kwai-Keye/Keye-VL-1_5-8B", + "moonshotai/Kimi-VL-A3B-Instruct", + "moonshotai/Kimi-VL-A3B-Thinking", + "lightonai/LightOnOCR-1B", + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1", + "llava-hf/llava-1.5-7b-hf", + "TIGER-Lab/Mantis-8B-siglip-llama3", + "mistral-community/pixtral-12b", + "llava-hf/llava-v1.6-mistral-7b-hf", + "llava-hf/llava-v1.6-vicuna-7b-hf", + "llava-hf/LLaVA-NeXT-Video-7B-hf", + "llava-hf/llava-onevision-qwen2-7b-ov-hf", + "llava-hf/llava-onevision-qwen2-0.5b-ov-hf", + "mispeech/midashenglm-7b", + "openbmb/MiniCPM-o-2_6", + "openbmb/MiniCPM-V-2", + "openbmb/MiniCPM-Llama3-V-2_5", + "openbmb/MiniCPM-V-2_6", + "openbmb/MiniCPM-V-4", + "openbmb/MiniCPM-V-4_5", + "MiniMaxAI/MiniMax-VL-01", + "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "allenai/Molmo-7B-D-0924", + "allenai/Molmo-7B-O-0924", + "nvidia/NVLM-D-72B", + "AIDC-AI/Ovis2-1B", + "AIDC-AI/Ovis1.6-Llama3.2-3B", + "AIDC-AI/Ovis2.5-9B", + "google/paligemma-3b-pt-224", + "google/paligemma-3b-mix-224", + "google/paligemma2-3b-ft-docci-448", + "microsoft/Phi-3-vision-128k-instruct", + "microsoft/Phi-3.5-vision-instruct", + "microsoft/Phi-4-multimodal-instruct", + "mistralai/Pixtral-12B-2409", + "Qwen/Qwen-VL", + "Qwen/Qwen-VL-Chat", + "Qwen/Qwen2-Audio-7B-Instruct", + "Qwen/QVQ-72B-Preview", + "Qwen/Qwen2-VL-7B-Instruct", + "Qwen/Qwen2-VL-72B-Instruct", + "Qwen/Qwen2.5-VL-3B-Instruct", + "Qwen/Qwen2.5-VL-72B-Instruct", + "Qwen/Qwen2.5-Omni-3B", + "Qwen/Qwen2.5-Omni-7B", + "Qwen/Qwen3-VL-4B-Instruct", + "Qwen/Qwen3-VL-30B-A3B-Instruct", + "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "Qwen/Qwen3-Omni-30B-A3B-Thinking", + "YannQi/R-4B", + "Skywork/Skywork-R1V-38B", + "SmolVLM2-2.2B-Instruct", + "stepfun-ai/step3", + "omni-search/Tarsier-7b", + "omni-search/Tarsier-34b", + "omni-research/Tarsier2-Recap-7b", + "omni-research/Tarsier2-7b-0115" + ] + } + }, + "required": ["huggingfaceName"] + }, + "resources": { + "type": "object", + "description": "资源配置", + "properties": { + "gpuLimit": { + "type": "integer", + "description": "GPU 限制", + "default": 1, + "minimum": 1 + }, + "cpuRequest": { + "type": "integer", + "description": "CPU 请求", + "default": 12, + "minimum": 1 + }, + "memoryLimit": { + "type": "string", + "description": "内存限制", + "default": "16Gi", + "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$" + }, + "shmSize": { + "type": "string", + "description": "共享内存大小", + "default": "20Gi", + "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$" + } + } + }, + "workerSize": { + "type": "integer", + "description": "Worker 数量", + "default": 1, + "minimum": 1 + }, + "command": { + "type": "string", + "description": "自定义命令,模型路径路为 /Model/Weight/Qwen3-0.6B, LoRA 路径为 /Model/LoRA/Qwen3-0.6B (可选) \n e.g. vllm serve --model /Model/Weight/Qwen3-0.6B ", + "default": "" + } + } +} \ No newline at end of file diff --git a/vllm-serve/values.yaml b/vllm-serve/values.yaml new file mode 100644 index 0000000..2d5a6a0 --- /dev/null +++ b/vllm-serve/values.yaml @@ -0,0 +1,76 @@ +# Default values for vllm-app. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +imagePullPolicy: IfNotPresent +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + + +# 模型配置 +model: + huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个 + localMountPath: "/Model" # PVC 固定挂载路径 + huggingfaceToken: "" + download: # 启用自动下载 + image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像 + +# 功能选择 + +resources: + gpuLimit: 1 + cpuRequest: 12 + memoryLimit: "16Gi" + shmSize: "20Gi" + +svc: + type: LoadBalancer + port: 80 + targetPort: 8000 + # nodePort: 30080 +# vLLM 应用配置 +vllm: + image: "docker.io/vllm/vllm-openai:latest" + +command: "" + +llama: + image: "docker.io/library/one-click:v1" + +# lmdeploy 应用配置 +lmdeploy: + image: "docker.io/openmmlab/lmdeploy:latest-cu12" + + +# NFS PV/PVC 配置 +nfs: + server: "10.6.80.11" + path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" + # storageClass: "local-path" + pvSize: "500Gi" + pvcSize: "50Gi" + +# LeaderWorkerSet 配置 +replicaCount: 1 +workerSize: 1 + +nodeSelector: {} + +tolerations: [] + +affinity: {}