feat: support Hami on k3s and k8s
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 7s

This commit is contained in:
Ivan087
2025-12-05 17:09:01 +08:00
parent 97c5d559e3
commit 89bc94a6a9
5 changed files with 27 additions and 2 deletions

View File

@ -5,4 +5,4 @@ appVersion: 1.16.0
description: A Helm chart for deploying vLLM with NFS storage
name: vllm-serve
type: application
version: 0.2.1
version: 0.3.0

View File

@ -16,7 +16,7 @@ spec:
initContainers:
# 模型下载作为第一个 initContainer
- name: download-model
image: {{ .Values.model.download.image }}
image: alpine:latest
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
env:
- name: HF_ENDPOINT
@ -122,6 +122,9 @@ spec:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
@ -129,6 +132,9 @@ spec:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
@ -199,6 +205,9 @@ spec:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}
@ -206,6 +215,9 @@ spec:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
{{- if .Values.rdma.enabled }}
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
{{- end }}

View File

@ -92,12 +92,18 @@ spec:
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
memory: {{ .Values.resources.memoryLimit }}
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
requests:
ephemeral-storage: 10Gi
cpu: {{ .Values.resources.cpuRequest }}
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
{{- end }}
ports:
- containerPort: 8000
name: http

View File

@ -593,6 +593,12 @@
"default": 1,
"minimum": 1
},
"gpuMem": {
"type": "integer",
"description": "GPU 显存限制单位MB, 0表示独占卡",
"default": 0,
"minimum": 0
},
"cpuRequest": {
"type": "integer",
"description": "CPU 请求",

View File

@ -32,6 +32,7 @@ model:
resources:
gpuLimit: 1
gpuMem: 0
cpuRequest: 12
memoryLimit: "16Gi"
shmSize: "20Gi"