feat: support Hami on k3s and k8s
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 7s
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 7s
This commit is contained in:
@ -5,4 +5,4 @@ appVersion: 1.16.0
|
||||
description: A Helm chart for deploying vLLM with NFS storage
|
||||
name: vllm-serve
|
||||
type: application
|
||||
version: 0.2.1
|
||||
version: 0.3.0
|
||||
|
||||
@ -16,7 +16,7 @@ spec:
|
||||
initContainers:
|
||||
# 模型下载作为第一个 initContainer
|
||||
- name: download-model
|
||||
image: {{ .Values.model.download.image }}
|
||||
image: alpine:latest
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }}
|
||||
env:
|
||||
- name: HF_ENDPOINT
|
||||
@ -122,6 +122,9 @@ spec:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
{{- if .Values.rdma.enabled }}
|
||||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||||
{{- end }}
|
||||
@ -129,6 +132,9 @@ spec:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
{{- if .Values.rdma.enabled }}
|
||||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||||
{{- end }}
|
||||
@ -199,6 +205,9 @@ spec:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
{{- if .Values.rdma.enabled }}
|
||||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||||
{{- end }}
|
||||
@ -206,6 +215,9 @@ spec:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
{{- if .Values.rdma.enabled }}
|
||||
{{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }}
|
||||
{{- end }}
|
||||
|
||||
@ -92,12 +92,18 @@ spec:
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
memory: {{ .Values.resources.memoryLimit }}
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
requests:
|
||||
ephemeral-storage: 10Gi
|
||||
cpu: {{ .Values.resources.cpuRequest }}
|
||||
{{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }}
|
||||
nvidia.com/gpumem: {{ .Values.resources.gpuMem }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
|
||||
@ -593,6 +593,12 @@
|
||||
"default": 1,
|
||||
"minimum": 1
|
||||
},
|
||||
"gpuMem": {
|
||||
"type": "integer",
|
||||
"description": "GPU 显存限制,单位MB, 0表示独占卡",
|
||||
"default": 0,
|
||||
"minimum": 0
|
||||
},
|
||||
"cpuRequest": {
|
||||
"type": "integer",
|
||||
"description": "CPU 请求",
|
||||
|
||||
@ -32,6 +32,7 @@ model:
|
||||
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
gpuMem: 0
|
||||
cpuRequest: 12
|
||||
memoryLimit: "16Gi"
|
||||
shmSize: "20Gi"
|
||||
|
||||
Reference in New Issue
Block a user