From 89bc94a6a96d1fe478a3179bbde822d27f2401bf Mon Sep 17 00:00:00 2001 From: Ivan087 Date: Fri, 5 Dec 2025 17:09:01 +0800 Subject: [PATCH] feat: support Hami on k3s and k8s --- vllm-serve/Chart.yaml | 2 +- vllm-serve/templates/lws.yaml | 14 +++++++++++++- vllm-serve/templates/single.yaml | 6 ++++++ vllm-serve/values.schema.json | 6 ++++++ vllm-serve/values.yaml | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/vllm-serve/Chart.yaml b/vllm-serve/Chart.yaml index 090d5e6..87049a8 100644 --- a/vllm-serve/Chart.yaml +++ b/vllm-serve/Chart.yaml @@ -5,4 +5,4 @@ appVersion: 1.16.0 description: A Helm chart for deploying vLLM with NFS storage name: vllm-serve type: application -version: 0.2.1 +version: 0.3.0 diff --git a/vllm-serve/templates/lws.yaml b/vllm-serve/templates/lws.yaml index 1f2a087..e8e78c0 100644 --- a/vllm-serve/templates/lws.yaml +++ b/vllm-serve/templates/lws.yaml @@ -16,7 +16,7 @@ spec: initContainers: # 模型下载作为第一个 initContainer - name: download-model - image: {{ .Values.model.download.image }} + image: alpine:latest imagePullPolicy: {{ .Values.imagePullPolicy | default "IfNotPresent" }} env: - name: HF_ENDPOINT @@ -122,6 +122,9 @@ spec: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} {{- if .Values.rdma.enabled }} {{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }} {{- end }} @@ -129,6 +132,9 @@ spec: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} {{- if .Values.rdma.enabled }} {{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }} {{- end }} @@ -199,6 +205,9 @@ spec: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} {{- if .Values.rdma.enabled }} {{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }} {{- end }} @@ -206,6 +215,9 @@ spec: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} {{- if .Values.rdma.enabled }} {{ .Values.rdma.resourceName }}: {{ .Values.rdma.resourceCount | default 1 }} {{- end }} diff --git a/vllm-serve/templates/single.yaml b/vllm-serve/templates/single.yaml index 5fc40a2..ef6c918 100644 --- a/vllm-serve/templates/single.yaml +++ b/vllm-serve/templates/single.yaml @@ -92,12 +92,18 @@ spec: resources: limits: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} memory: {{ .Values.resources.memoryLimit }} ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} requests: ephemeral-storage: 10Gi cpu: {{ .Values.resources.cpuRequest }} + {{- if and .Values.resources.gpuMem (gt (int .Values.resources.gpuMem) 0) }} + nvidia.com/gpumem: {{ .Values.resources.gpuMem }} + {{- end }} ports: - containerPort: 8000 name: http diff --git a/vllm-serve/values.schema.json b/vllm-serve/values.schema.json index 8e10cd2..91043c6 100644 --- a/vllm-serve/values.schema.json +++ b/vllm-serve/values.schema.json @@ -593,6 +593,12 @@ "default": 1, "minimum": 1 }, + "gpuMem": { + "type": "integer", + "description": "GPU 显存限制,单位MB, 0表示独占卡", + "default": 0, + "minimum": 0 + }, "cpuRequest": { "type": "integer", "description": "CPU 请求", diff --git a/vllm-serve/values.yaml b/vllm-serve/values.yaml index 266a1fe..8b59c14 100644 --- a/vllm-serve/values.yaml +++ b/vllm-serve/values.yaml @@ -32,6 +32,7 @@ model: resources: gpuLimit: 1 + gpuMem: 0 cpuRequest: 12 memoryLimit: "16Gi" shmSize: "20Gi"