diff --git a/vllm/vllm-serve/templates/NOTES.txt b/vllm/vllm-serve/templates/NOTES.txt index 820b7e0..9b881b5 100644 --- a/vllm/vllm-serve/templates/NOTES.txt +++ b/vllm/vllm-serve/templates/NOTES.txt @@ -3,12 +3,12 @@ export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }}) export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") echo http://$NODE_IP:$NODE_PORT -{{- else if contains "LoadBalancer" .Values.service.type }} +{{- else if contains "LoadBalancer" .Values.svc.type }} NOTE: It may take a few minutes for the LoadBalancer IP to be available. You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}' export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.service.port }} -{{- else if contains "ClusterIP" .Values.service.type }} + echo http://$SERVICE_IP:{{ .Values.svc.port }} +{{- else if contains "ClusterIP" .Values.svc.type }} export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") echo "Visit http://127.0.0.1:8080 to use your application" diff --git a/vllm/vllm-serve/templates/lws.yaml b/vllm/vllm-serve/templates/lws.yaml index 02fb247..b430e03 100644 --- a/vllm/vllm-serve/templates/lws.yaml +++ b/vllm/vllm-serve/templates/lws.yaml @@ -28,7 +28,7 @@ spec: - -c - | MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") - DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME" + DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME" # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" # 检查模型是否存在,不存在则下载 echo "DEST_DIR= $DEST_DIR" @@ -37,6 +37,7 @@ spec: echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" wget https://hf-mirror.com/hfd/hfd.sh chmod a+x hfd.sh + apt update && apt upgrade apt install aria2 -y ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" @@ -74,7 +75,7 @@ spec: - sh - -c - "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE); - MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; + MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" resources: limits: diff --git a/vllm/vllm-serve/templates/nfs-pvc.yaml b/vllm/vllm-serve/templates/nfs-pvc.yaml index 82ab78c..1fe65a1 100644 --- a/vllm/vllm-serve/templates/nfs-pvc.yaml +++ b/vllm/vllm-serve/templates/nfs-pvc.yaml @@ -1,28 +1,40 @@ -apiVersion: v1 -kind: PersistentVolume -metadata: - name: {{ .Release.Name }}-pv-model -spec: - storageClassName: {{ .Values.nfs.storageClass | default "local-path" }} - capacity: - storage: {{ .Values.nfs.pvSize }} - accessModes: - - ReadWriteMany - persistentVolumeReclaimPolicy: Retain - nfs: - path: {{ .Values.nfs.path }} - server: {{ .Values.nfs.server }} +#apiVersion: v1 +#kind: PersistentVolume +#metadata: +# name: {{ .Release.Name }}-pv-model +#spec: +# storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }} +# capacity: +# storage: {{ .Values.nfs.pvSize }} +# accessModes: +# - ReadWriteMany +# persistentVolumeReclaimPolicy: Retain +# # nfs: +# # path: {{ .Values.nfs.path }} +# # server: {{ .Values.nfs.server }} --- - apiVersion: v1 kind: PersistentVolumeClaim metadata: name: {{ .Release.Name }}-pvc-model - annotations: + # annotations: spec: accessModes: - ReadWriteMany resources: requests: storage: {{ .Values.nfs.pvcSize }} - volumeName: {{ .Release.Name }}-pv-model + # volumeName: {{ .Release.Name }}-pv-model + storageClassName: weight + +#apiVersion: v1 +#kind: PersistentVolumeClaim +#metadata: +# name: {{ .Release.Name }}-pvc-model +#spec: +# accessModes: +# - ReadWriteMany +# resources: +# requests: +# storage: 20Gi +# storageClassName: nas-dataset diff --git a/vllm/vllm-serve/templates/services.yaml b/vllm/vllm-serve/templates/services.yaml index 54f2623..b348ff6 100644 --- a/vllm/vllm-serve/templates/services.yaml +++ b/vllm/vllm-serve/templates/services.yaml @@ -18,7 +18,7 @@ kind: Service metadata: name: {{ .Release.Name }}-svc spec: - type: {{ .Values.svc.type | default "NodePort" }} + type: {{ .Values.svc.type | default "LoadBalancer" }} {{- if gt (int .Values.workerSize) 1 }} selector: leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }} @@ -29,7 +29,7 @@ spec: {{- end }} ports: - protocol: TCP - port: {{ .Values.svc.port | default 8080 }} - targetPort: {{ .Values.svc.port | default 8080 }} - nodePort: {{ .Values.svc.nodePort | default 30080 }} + port: {{ .Values.svc.port | default 80 }} + targetPort: {{ .Values.svc.targetPort | default 8080 }} + # nodePort: {{ .Values.svc.nodePort | default 30080 }} diff --git a/vllm/vllm-serve/templates/single.yaml b/vllm/vllm-serve/templates/single.yaml index bcb13bc..ab5e44c 100644 --- a/vllm/vllm-serve/templates/single.yaml +++ b/vllm/vllm-serve/templates/single.yaml @@ -28,7 +28,7 @@ spec: - -c - | MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}") - DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME" + DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME" # DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}" # 检查模型是否存在,不存在则下载 echo "DEST_DIR= $DEST_DIR" @@ -37,6 +37,7 @@ spec: echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR" wget https://hf-mirror.com/hfd/hfd.sh chmod a+x hfd.sh + apt update && apt upgrade apt install aria2 -y ./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" # huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR" @@ -58,9 +59,25 @@ spec: command: - sh - -c - - "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME; - echo 'Using single node ------------------------------------------'; - python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" + #args: + # {{- if .Values.command }} + # - {{ .Values.command | quote }} + # {{- else }} + # - | + # MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); + # MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; + # # 注意:这里引用变量时不再使用引号包围整个命令块 + # python3 -m vllm.entrypoints.openai.api_server \ + # --port 8080 \ + # --model $MODEL_PATH \ + # --tensor-parallel-size {{ .Values.resources.gpuLimit }} \ + # --pipeline_parallel_size {{ .Values.workerSize }} \ + # --trust_remote_code + # {{- end }} + - " + MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); + MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME; + python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code" resources: limits: nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}" @@ -92,7 +109,11 @@ spec: sizeLimit: {{ .Values.resources.shmSize }} - name: weight-volume persistentVolumeClaim: - claimName: {{ .Release.Name }}-pvc-model + claimName: {{ .Release.Name }}-pvc-model + # - name: weight-volume + # nfs: + # path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" + # server: "10.6.80.11" {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/vllm/vllm-serve/values.schema.json b/vllm/vllm-serve/values.schema.json new file mode 100644 index 0000000..c6f78de --- /dev/null +++ b/vllm/vllm-serve/values.schema.json @@ -0,0 +1,341 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "vllm-serve Helm Chart Values", + "description": "Schema for vllm-serve Helm chart values", + "type": "object", + "properties": { + "model": { + "type": "object", + "description": "模型配置", + "properties": { + "huggingfaceName": { + "type": "string", + "description": "HuggingFace 模型名称", + "default": "Qwen/Qwen2.5-0.5B-Instruct", + "enum": [ + "swiss-ai/Apertus-8B-2509", + "swiss-ai/Apertus-70B-Instruct-2509", + "BAAI/Aquila-7B", + "BAAI/AquilaChat-7B", + "arcee-ai/AFM-4.5B-Base", + "Snowflake/snowflake-arctic-base", + "Snowflake/snowflake-arctic-instruct", + "baichuan-inc/Baichuan2-13B-Chat", + "baichuan-inc/Baichuan-7B", + "inclusionAI/Ling-lite-1.5", + "inclusionAI/Ling-plus", + "inclusionAI/Ling-mini-2.0", + "ibm-ai-platform/Bamba-9B-fp8", + "ibm-ai-platform/Bamba-9B", + "bigscience/bloom", + "bigscience/bloomz", + "zai-org/chatglm2-6b", + "zai-org/chatglm3-6b", + "CohereLabs/c4ai-command-r-v01", + "CohereLabs/c4ai-command-r7b-12-2024", + "CohereLabs/c4ai-command-a-03-2025", + "CohereLabs/command-a-reasoning-08-2025", + "databricks/dbrx-base", + "databricks/dbrx-instruct", + "nvidia/Llama-3_3-Nemotron-Super-49B-v1", + "deepseek-ai/deepseek-llm-67b-base", + "deepseek-ai/deepseek-llm-7b-chat", + "deepseek-ai/DeepSeek-V2", + "deepseek-ai/DeepSeek-V2-Chat", + "deepseek-ai/DeepSeek-V3", + "deepseek-ai/DeepSeek-R1", + "deepseek-ai/DeepSeek-V3.1", + "rednote-hilab/dots.llm1.base", + "rednote-hilab/dots.llm1.inst", + "rednote-hilab/dots.ocr", + "baidu/ERNIE-4.5-0.3B-PT", + "baidu/ERNIE-4.5-21B-A3B-PT", + "baidu/ERNIE-4.5-300B-A47B-PT", + "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", + "LGAI-EXAONE/EXAONE-4.0-32B", + "mgleize/fairseq2-dummy-Llama-3.2-1B", + "tiiuae/falcon-7b", + "tiiuae/falcon-40b", + "tiiuae/falcon-rw-7b", + "tiiuae/falcon-mamba-7b", + "tiiuae/falcon-mamba-7b-instruct", + "tiiuae/Falcon-H1-34B-Base", + "tiiuae/Falcon-H1-34B-Instruct", + "allenai/FlexOlmo-7x7B-1T", + "allenai/FlexOlmo-7x7B-1T-RT", + "google/gemma-2b", + "google/gemma-1.1-2b-it", + "google/gemma-2-9b", + "google/gemma-2-27b", + "google/gemma-3-1b-it", + "google/gemma-3n-E2B-it", + "google/gemma-3n-E4B-it", + "zai-org/glm-4-9b-chat-hf", + "zai-org/GLM-4-32B-0414", + "zai-org/GLM-4.5", + "gpt2", + "gpt2-xl", + "bigcode/starcoder", + "bigcode/gpt_bigcode-santacoder", + "WizardLM/WizardCoder-15B-V1.0", + "EleutherAI/gpt-j-6b", + "nomic-ai/gpt4all-j", + "EleutherAI/gpt-neox-20b", + "EleutherAI/pythia-12b", + "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5", + "databricks/dolly-v2-12b", + "stabilityai/stablelm-tuned-alpha-7b", + "openai/gpt-oss-120b", + "openai/gpt-oss-20b", + "ibm-granite/granite-3.0-2b-base", + "ibm-granite/granite-3.1-8b-instruct", + "ibm/PowerLM-3b", + "ibm-granite/granite-3.0-1b-a400m-base", + "ibm-granite/granite-3.0-3b-a800m-instruct", + "ibm/PowerMoE-3b", + "ibm-granite/granite-4.0-tiny-preview", + "parasail-ai/GritLM-7B-vllm", + "hpcai-tech/grok-1", + "tencent/Hunyuan-7B-Instruct", + "tencent/Hunyuan-A13B-Instruct", + "tencent/Hunyuan-A13B-Pretrain", + "tencent/Hunyuan-A13B-Instruct-FP8", + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", + "internlm/internlm-7b", + "internlm/internlm-chat-7b", + "internlm/internlm2-7b", + "internlm/internlm2-chat-7b", + "internlm/internlm3-8b-instruct", + "inceptionai/jais-13b", + "inceptionai/jais-13b-chat", + "inceptionai/jais-30b-v3", + "inceptionai/jais-30b-chat-v3", + "ai21labs/AI21-Jamba-1.5-Large", + "ai21labs/AI21-Jamba-1.5-Mini", + "ai21labs/Jamba-v0.1", + "LiquidAI/LFM2-1.2B", + "LiquidAI/LFM2-700M", + "LiquidAI/LFM2-350M", + "LiquidAI/LFM2-8B-A1B-preview", + "meta-llama/Meta-Llama-3.1-405B-Instruct", + "meta-llama/Meta-Llama-3.1-70B", + "meta-llama/Meta-Llama-3-70B-Instruct", + "meta-llama/Llama-2-70b-hf", + "01-ai/Yi-34B", + "state-spaces/mamba-130m-hf", + "state-spaces/mamba-790m-hf", + "state-spaces/mamba-2.8b-hf", + "mistralai/Mamba-Codestral-7B-v0.1", + "XiaomiMiMo/MiMo-7B-RL", + "openbmb/MiniCPM-2B-sft-bf16", + "openbmb/MiniCPM-2B-dpo-bf16", + "openbmb/MiniCPM-S-1B-sft", + "openbmb/MiniCPM3-4B", + "MiniMaxAI/MiniMax-M2", + "mistralai/Mistral-7B-v0.1", + "mistralai/Mistral-7B-Instruct-v0.1", + "mistralai/Mixtral-8x7B-v0.1", + "mistralai/Mixtral-8x7B-Instruct-v0.1", + "mistral-community/Mixtral-8x22B-v0.1", + "mosaicml/mpt-7b", + "mosaicml/mpt-7b-storywriter", + "mosaicml/mpt-30b", + "nvidia/Minitron-8B-Base", + "mgoin/Nemotron-4-340B-Base-hf-FP8", + "nvidia/Nemotron-H-8B-Base-8K", + "nvidia/Nemotron-H-47B-Base-8K", + "nvidia/Nemotron-H-56B-Base-8K", + "allenai/OLMo-1B-hf", + "allenai/OLMo-7B-hf", + "allenai/OLMo-2-0425-1B", + "allenai/OLMoE-1B-7B-0924", + "allenai/OLMoE-1B-7B-0924-Instruct", + "facebook/opt-66b", + "facebook/opt-iml-max-30b", + "OrionStarAI/Orion-14B-Base", + "OrionStarAI/Orion-14B-Chat", + "microsoft/phi-1_5", + "microsoft/phi-2", + "microsoft/Phi-4-mini-instruct", + "microsoft/Phi-4", + "microsoft/Phi-3-mini-4k-instruct", + "microsoft/Phi-3-mini-128k-instruct", + "microsoft/Phi-3-medium-128k-instruct", + "microsoft/Phi-3.5-MoE-instruct", + "adept/persimmon-8b-base", + "adept/persimmon-8b-chat", + "pfnet/plamo-2-1b", + "pfnet/plamo-2-8b", + "Qwen/Qwen-7B", + "Qwen/Qwen-7B-Chat", + "Qwen/QwQ-32B-Preview", + "Qwen/Qwen2-7B-Instruct", + "Qwen/Qwen2-7B", + "Qwen/Qwen2.5-0.5B-Instruct", + "Qwen/Qwen1.5-MoE-A2.7B", + "Qwen/Qwen1.5-MoE-A2.7B-Chat", + "Qwen/Qwen3-8B", + "Qwen/Qwen3-30B-A3B", + "Qwen/Qwen3-Next-80B-A3B-Instruct", + "ByteDance-Seed/Seed-OSS-36B-Instruct", + "stabilityai/stablelm-3b-4e1t", + "stabilityai/stablelm-base-alpha-7b-v2", + "bigcode/starcoder2-3b", + "bigcode/starcoder2-7b", + "bigcode/starcoder2-15b", + "upstage/solar-pro-preview-instruct", + "Tele-AI/TeleChat2-3B", + "Tele-AI/TeleChat2-7B", + "Tele-AI/TeleChat2-35B", + "CofeAI/FLM-2-52B-Instruct-2407", + "CofeAI/Tele-FLM", + "xverse/XVERSE-7B-Chat", + "xverse/XVERSE-13B-Chat", + "xverse/XVERSE-65B-Chat", + "MiniMaxAI/MiniMax-M1-40k", + "MiniMaxAI/MiniMax-M1-80k", + "MiniMaxAI/MiniMax-Text-01", + "Zyphra/Zamba2-7B-instruct", + "Zyphra/Zamba2-2.7B-instruct", + "Zyphra/Zamba2-1.2B-instruct", + "meituan-longcat/LongCat-Flash-Chat", + "meituan-longcat/LongCat-Flash-Chat-FP8", + "rhymes-ai/Aria", + "CohereForAI/aya-vision-8b", + "CohereForAI/aya-vision-32b", + "Open-Bee/Bee-8B-RL", + "Open-Bee/Bee-8B-SFT", + "Salesforce/blip2-opt-2.7b", + "Salesforce/blip2-opt-6.7b", + "facebook/chameleon-7b", + "CohereLabs/command-a-vision-07-2025", + "deepseek-ai/deepseek-vl2-tiny", + "deepseek-ai/deepseek-vl2-small", + "deepseek-ai/deepseek-vl2", + "deepseek-ai/DeepSeek-OCR", + "baidu/ERNIE-4.5-VL-28B-A3B-PT", + "baidu/ERNIE-4.5-VL-424B-A47B-PT", + "adept/fuyu-8b", + "google/gemma-3-4b-it", + "google/gemma-3-27b-it", + "zai-org/glm-4v-9b", + "zai-org/cogagent-9b-20241220", + "zai-org/GLM-4.1V-9B-Thinking", + "zai-org/GLM-4.5V", + "ibm-granite/granite-speech-3.3-8b", + "h2oai/h2ovl-mississippi-800m", + "h2oai/h2ovl-mississippi-2b", + "HuggingFaceM4/Idefics3-8B-Llama3", + "internlm/Intern-S1", + "internlm/Intern-S1-mini", + "OpenGVLab/InternVL3_5-14B", + "OpenGVLab/InternVL3-9B", + "OpenGVLab/InternVideo2_5_Chat_8B", + "OpenGVLab/InternVL2_5-4B", + "OpenGVLab/Mono-InternVL-2B", + "OpenGVLab/InternVL2-4B", + "OpenGVLab/InternVL3-1B-hf", + "Kwai-Keye/Keye-VL-8B-Preview", + "Kwai-Keye/Keye-VL-1_5-8B", + "moonshotai/Kimi-VL-A3B-Instruct", + "moonshotai/Kimi-VL-A3B-Thinking", + "lightonai/LightOnOCR-1B", + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1", + "llava-hf/llava-1.5-7b-hf", + "TIGER-Lab/Mantis-8B-siglip-llama3", + "mistral-community/pixtral-12b", + "llava-hf/llava-v1.6-mistral-7b-hf", + "llava-hf/llava-v1.6-vicuna-7b-hf", + "llava-hf/LLaVA-NeXT-Video-7B-hf", + "llava-hf/llava-onevision-qwen2-7b-ov-hf", + "llava-hf/llava-onevision-qwen2-0.5b-ov-hf", + "mispeech/midashenglm-7b", + "openbmb/MiniCPM-o-2_6", + "openbmb/MiniCPM-V-2", + "openbmb/MiniCPM-Llama3-V-2_5", + "openbmb/MiniCPM-V-2_6", + "openbmb/MiniCPM-V-4", + "openbmb/MiniCPM-V-4_5", + "MiniMaxAI/MiniMax-VL-01", + "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "allenai/Molmo-7B-D-0924", + "allenai/Molmo-7B-O-0924", + "nvidia/NVLM-D-72B", + "AIDC-AI/Ovis2-1B", + "AIDC-AI/Ovis1.6-Llama3.2-3B", + "AIDC-AI/Ovis2.5-9B", + "google/paligemma-3b-pt-224", + "google/paligemma-3b-mix-224", + "google/paligemma2-3b-ft-docci-448", + "microsoft/Phi-3-vision-128k-instruct", + "microsoft/Phi-3.5-vision-instruct", + "microsoft/Phi-4-multimodal-instruct", + "mistralai/Pixtral-12B-2409", + "Qwen/Qwen-VL", + "Qwen/Qwen-VL-Chat", + "Qwen/Qwen2-Audio-7B-Instruct", + "Qwen/QVQ-72B-Preview", + "Qwen/Qwen2-VL-7B-Instruct", + "Qwen/Qwen2-VL-72B-Instruct", + "Qwen/Qwen2.5-VL-3B-Instruct", + "Qwen/Qwen2.5-VL-72B-Instruct", + "Qwen/Qwen2.5-Omni-3B", + "Qwen/Qwen2.5-Omni-7B", + "Qwen/Qwen3-VL-4B-Instruct", + "Qwen/Qwen3-VL-30B-A3B-Instruct", + "Qwen/Qwen3-Omni-30B-A3B-Instruct", + "Qwen/Qwen3-Omni-30B-A3B-Thinking", + "YannQi/R-4B", + "Skywork/Skywork-R1V-38B", + "SmolVLM2-2.2B-Instruct", + "stepfun-ai/step3", + "omni-search/Tarsier-7b", + "omni-search/Tarsier-34b", + "omni-research/Tarsier2-Recap-7b", + "omni-research/Tarsier2-7b-0115" + ] + } + }, + "required": ["huggingfaceName"] + }, + "resources": { + "type": "object", + "description": "资源配置", + "properties": { + "gpuLimit": { + "type": "integer", + "description": "GPU 限制", + "default": 1, + "minimum": 1 + }, + "cpuRequest": { + "type": "integer", + "description": "CPU 请求", + "default": 12, + "minimum": 1 + }, + "memoryLimit": { + "type": "string", + "description": "内存限制", + "default": "16Gi", + "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$" + }, + "shmSize": { + "type": "string", + "description": "共享内存大小", + "default": "20Gi", + "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$" + } + } + }, + "workerSize": { + "type": "integer", + "description": "Worker 数量", + "default": 1, + "minimum": 1 + } + } +} diff --git a/vllm/vllm-serve/values.yaml b/vllm/vllm-serve/values.yaml index 49e85ae..d1cb815 100644 --- a/vllm/vllm-serve/values.yaml +++ b/vllm/vllm-serve/values.yaml @@ -39,14 +39,15 @@ resources: shmSize: "20Gi" svc: - type: NodePort + type: LoadBalancer port: 80 targetPort: 8080 - nodePort: 30080 + # nodePort: 30080 # vLLM 应用配置 vllm: image: "docker.io/vllm/vllm-openai:latest" +command: "" llama: image: "docker.io/library/one-click:v1" @@ -60,13 +61,13 @@ lmdeploy: nfs: server: "10.6.80.11" path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight" - storageClass: "local-path" + # storageClass: "local-path" pvSize: "500Gi" pvcSize: "50Gi" # LeaderWorkerSet 配置 replicaCount: 1 -workerSize: 2 +workerSize: 1 nodeSelector: {}