compatible with two clusters
This commit is contained in:
@ -3,12 +3,12 @@
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "vllm-serve.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
{{- else if contains "LoadBalancer" .Values.svc.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "vllm-serve.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "vllm-serve.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
echo http://$SERVICE_IP:{{ .Values.svc.port }}
|
||||
{{- else if contains "ClusterIP" .Values.svc.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "vllm-serve.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your application"
|
||||
|
||||
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -74,7 +75,7 @@ spec:
|
||||
- sh
|
||||
- -c
|
||||
- "bash /vllm-workspace/examples/online_serving/multi-node-serving.sh leader --ray_cluster_size=$(LWS_GROUP_SIZE);
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
|
||||
@ -1,28 +1,40 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pv-model
|
||||
spec:
|
||||
storageClassName: {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
capacity:
|
||||
storage: {{ .Values.nfs.pvSize }}
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
nfs:
|
||||
path: {{ .Values.nfs.path }}
|
||||
server: {{ .Values.nfs.server }}
|
||||
#apiVersion: v1
|
||||
#kind: PersistentVolume
|
||||
#metadata:
|
||||
# name: {{ .Release.Name }}-pv-model
|
||||
#spec:
|
||||
# storageClassName: weight # {{ .Values.nfs.storageClass | default "local-path" }}
|
||||
# capacity:
|
||||
# storage: {{ .Values.nfs.pvSize }}
|
||||
# accessModes:
|
||||
# - ReadWriteMany
|
||||
# persistentVolumeReclaimPolicy: Retain
|
||||
# # nfs:
|
||||
# # path: {{ .Values.nfs.path }}
|
||||
# # server: {{ .Values.nfs.server }}
|
||||
---
|
||||
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-pvc-model
|
||||
annotations:
|
||||
# annotations:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.nfs.pvcSize }}
|
||||
volumeName: {{ .Release.Name }}-pv-model
|
||||
# volumeName: {{ .Release.Name }}-pv-model
|
||||
storageClassName: weight
|
||||
|
||||
#apiVersion: v1
|
||||
#kind: PersistentVolumeClaim
|
||||
#metadata:
|
||||
# name: {{ .Release.Name }}-pvc-model
|
||||
#spec:
|
||||
# accessModes:
|
||||
# - ReadWriteMany
|
||||
# resources:
|
||||
# requests:
|
||||
# storage: 20Gi
|
||||
# storageClassName: nas-dataset
|
||||
|
||||
@ -18,7 +18,7 @@ kind: Service
|
||||
metadata:
|
||||
name: {{ .Release.Name }}-svc
|
||||
spec:
|
||||
type: {{ .Values.svc.type | default "NodePort" }}
|
||||
type: {{ .Values.svc.type | default "LoadBalancer" }}
|
||||
{{- if gt (int .Values.workerSize) 1 }}
|
||||
selector:
|
||||
leaderworkerset.sigs.k8s.io/name: {{ .Release.Name }}
|
||||
@ -29,7 +29,7 @@ spec:
|
||||
{{- end }}
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: {{ .Values.svc.port | default 8080 }}
|
||||
targetPort: {{ .Values.svc.port | default 8080 }}
|
||||
nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||
port: {{ .Values.svc.port | default 80 }}
|
||||
targetPort: {{ .Values.svc.targetPort | default 8080 }}
|
||||
# nodePort: {{ .Values.svc.nodePort | default 30080 }}
|
||||
|
||||
|
||||
@ -28,7 +28,7 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
MODEL_NAME=$(basename "{{ .Values.model.huggingfaceName }}")
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/$MODEL_NAME"
|
||||
DEST_DIR="{{ .Values.model.localMountPath }}/Weight/$MODEL_NAME"
|
||||
# DEST_DIR="{{ .Values.model.localMountPath }}/{{ .Values.model.huggingfaceName }}"
|
||||
# 检查模型是否存在,不存在则下载
|
||||
echo "DEST_DIR= $DEST_DIR"
|
||||
@ -37,6 +37,7 @@ spec:
|
||||
echo "Downloading model {{ .Values.model.huggingfaceName }} to $DEST_DIR"
|
||||
wget https://hf-mirror.com/hfd/hfd.sh
|
||||
chmod a+x hfd.sh
|
||||
apt update && apt upgrade
|
||||
apt install aria2 -y
|
||||
./hfd.sh {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
# huggingface-cli download {{ .Values.model.huggingfaceName }} --local-dir "$DEST_DIR"
|
||||
@ -58,9 +59,25 @@ spec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
#args:
|
||||
# {{- if .Values.command }}
|
||||
# - {{ .Values.command | quote }}
|
||||
# {{- else }}
|
||||
# - |
|
||||
# MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
# MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
# # 注意:这里引用变量时不再使用引号包围整个命令块
|
||||
# python3 -m vllm.entrypoints.openai.api_server \
|
||||
# --port 8080 \
|
||||
# --model $MODEL_PATH \
|
||||
# --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
||||
# --pipeline_parallel_size {{ .Values.workerSize }} \
|
||||
# --trust_remote_code
|
||||
# {{- end }}
|
||||
- "
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
MODEL_PATH='{{ .Values.model.localMountPath }}/Weight/'$MODEL_NAME;
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
@ -92,7 +109,11 @@ spec:
|
||||
sizeLimit: {{ .Values.resources.shmSize }}
|
||||
- name: weight-volume
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
claimName: {{ .Release.Name }}-pvc-model
|
||||
# - name: weight-volume
|
||||
# nfs:
|
||||
# path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
# server: "10.6.80.11"
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
|
||||
341
vllm/vllm-serve/values.schema.json
Normal file
341
vllm/vllm-serve/values.schema.json
Normal file
@ -0,0 +1,341 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"title": "vllm-serve Helm Chart Values",
|
||||
"description": "Schema for vllm-serve Helm chart values",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "object",
|
||||
"description": "模型配置",
|
||||
"properties": {
|
||||
"huggingfaceName": {
|
||||
"type": "string",
|
||||
"description": "HuggingFace 模型名称",
|
||||
"default": "Qwen/Qwen2.5-0.5B-Instruct",
|
||||
"enum": [
|
||||
"swiss-ai/Apertus-8B-2509",
|
||||
"swiss-ai/Apertus-70B-Instruct-2509",
|
||||
"BAAI/Aquila-7B",
|
||||
"BAAI/AquilaChat-7B",
|
||||
"arcee-ai/AFM-4.5B-Base",
|
||||
"Snowflake/snowflake-arctic-base",
|
||||
"Snowflake/snowflake-arctic-instruct",
|
||||
"baichuan-inc/Baichuan2-13B-Chat",
|
||||
"baichuan-inc/Baichuan-7B",
|
||||
"inclusionAI/Ling-lite-1.5",
|
||||
"inclusionAI/Ling-plus",
|
||||
"inclusionAI/Ling-mini-2.0",
|
||||
"ibm-ai-platform/Bamba-9B-fp8",
|
||||
"ibm-ai-platform/Bamba-9B",
|
||||
"bigscience/bloom",
|
||||
"bigscience/bloomz",
|
||||
"zai-org/chatglm2-6b",
|
||||
"zai-org/chatglm3-6b",
|
||||
"CohereLabs/c4ai-command-r-v01",
|
||||
"CohereLabs/c4ai-command-r7b-12-2024",
|
||||
"CohereLabs/c4ai-command-a-03-2025",
|
||||
"CohereLabs/command-a-reasoning-08-2025",
|
||||
"databricks/dbrx-base",
|
||||
"databricks/dbrx-instruct",
|
||||
"nvidia/Llama-3_3-Nemotron-Super-49B-v1",
|
||||
"deepseek-ai/deepseek-llm-67b-base",
|
||||
"deepseek-ai/deepseek-llm-7b-chat",
|
||||
"deepseek-ai/DeepSeek-V2",
|
||||
"deepseek-ai/DeepSeek-V2-Chat",
|
||||
"deepseek-ai/DeepSeek-V3",
|
||||
"deepseek-ai/DeepSeek-R1",
|
||||
"deepseek-ai/DeepSeek-V3.1",
|
||||
"rednote-hilab/dots.llm1.base",
|
||||
"rednote-hilab/dots.llm1.inst",
|
||||
"rednote-hilab/dots.ocr",
|
||||
"baidu/ERNIE-4.5-0.3B-PT",
|
||||
"baidu/ERNIE-4.5-21B-A3B-PT",
|
||||
"baidu/ERNIE-4.5-300B-A47B-PT",
|
||||
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
|
||||
"LGAI-EXAONE/EXAONE-4.0-32B",
|
||||
"mgleize/fairseq2-dummy-Llama-3.2-1B",
|
||||
"tiiuae/falcon-7b",
|
||||
"tiiuae/falcon-40b",
|
||||
"tiiuae/falcon-rw-7b",
|
||||
"tiiuae/falcon-mamba-7b",
|
||||
"tiiuae/falcon-mamba-7b-instruct",
|
||||
"tiiuae/Falcon-H1-34B-Base",
|
||||
"tiiuae/Falcon-H1-34B-Instruct",
|
||||
"allenai/FlexOlmo-7x7B-1T",
|
||||
"allenai/FlexOlmo-7x7B-1T-RT",
|
||||
"google/gemma-2b",
|
||||
"google/gemma-1.1-2b-it",
|
||||
"google/gemma-2-9b",
|
||||
"google/gemma-2-27b",
|
||||
"google/gemma-3-1b-it",
|
||||
"google/gemma-3n-E2B-it",
|
||||
"google/gemma-3n-E4B-it",
|
||||
"zai-org/glm-4-9b-chat-hf",
|
||||
"zai-org/GLM-4-32B-0414",
|
||||
"zai-org/GLM-4.5",
|
||||
"gpt2",
|
||||
"gpt2-xl",
|
||||
"bigcode/starcoder",
|
||||
"bigcode/gpt_bigcode-santacoder",
|
||||
"WizardLM/WizardCoder-15B-V1.0",
|
||||
"EleutherAI/gpt-j-6b",
|
||||
"nomic-ai/gpt4all-j",
|
||||
"EleutherAI/gpt-neox-20b",
|
||||
"EleutherAI/pythia-12b",
|
||||
"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
|
||||
"databricks/dolly-v2-12b",
|
||||
"stabilityai/stablelm-tuned-alpha-7b",
|
||||
"openai/gpt-oss-120b",
|
||||
"openai/gpt-oss-20b",
|
||||
"ibm-granite/granite-3.0-2b-base",
|
||||
"ibm-granite/granite-3.1-8b-instruct",
|
||||
"ibm/PowerLM-3b",
|
||||
"ibm-granite/granite-3.0-1b-a400m-base",
|
||||
"ibm-granite/granite-3.0-3b-a800m-instruct",
|
||||
"ibm/PowerMoE-3b",
|
||||
"ibm-granite/granite-4.0-tiny-preview",
|
||||
"parasail-ai/GritLM-7B-vllm",
|
||||
"hpcai-tech/grok-1",
|
||||
"tencent/Hunyuan-7B-Instruct",
|
||||
"tencent/Hunyuan-A13B-Instruct",
|
||||
"tencent/Hunyuan-A13B-Pretrain",
|
||||
"tencent/Hunyuan-A13B-Instruct-FP8",
|
||||
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
|
||||
"internlm/internlm-7b",
|
||||
"internlm/internlm-chat-7b",
|
||||
"internlm/internlm2-7b",
|
||||
"internlm/internlm2-chat-7b",
|
||||
"internlm/internlm3-8b-instruct",
|
||||
"inceptionai/jais-13b",
|
||||
"inceptionai/jais-13b-chat",
|
||||
"inceptionai/jais-30b-v3",
|
||||
"inceptionai/jais-30b-chat-v3",
|
||||
"ai21labs/AI21-Jamba-1.5-Large",
|
||||
"ai21labs/AI21-Jamba-1.5-Mini",
|
||||
"ai21labs/Jamba-v0.1",
|
||||
"LiquidAI/LFM2-1.2B",
|
||||
"LiquidAI/LFM2-700M",
|
||||
"LiquidAI/LFM2-350M",
|
||||
"LiquidAI/LFM2-8B-A1B-preview",
|
||||
"meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||
"meta-llama/Meta-Llama-3.1-70B",
|
||||
"meta-llama/Meta-Llama-3-70B-Instruct",
|
||||
"meta-llama/Llama-2-70b-hf",
|
||||
"01-ai/Yi-34B",
|
||||
"state-spaces/mamba-130m-hf",
|
||||
"state-spaces/mamba-790m-hf",
|
||||
"state-spaces/mamba-2.8b-hf",
|
||||
"mistralai/Mamba-Codestral-7B-v0.1",
|
||||
"XiaomiMiMo/MiMo-7B-RL",
|
||||
"openbmb/MiniCPM-2B-sft-bf16",
|
||||
"openbmb/MiniCPM-2B-dpo-bf16",
|
||||
"openbmb/MiniCPM-S-1B-sft",
|
||||
"openbmb/MiniCPM3-4B",
|
||||
"MiniMaxAI/MiniMax-M2",
|
||||
"mistralai/Mistral-7B-v0.1",
|
||||
"mistralai/Mistral-7B-Instruct-v0.1",
|
||||
"mistralai/Mixtral-8x7B-v0.1",
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
"mistral-community/Mixtral-8x22B-v0.1",
|
||||
"mosaicml/mpt-7b",
|
||||
"mosaicml/mpt-7b-storywriter",
|
||||
"mosaicml/mpt-30b",
|
||||
"nvidia/Minitron-8B-Base",
|
||||
"mgoin/Nemotron-4-340B-Base-hf-FP8",
|
||||
"nvidia/Nemotron-H-8B-Base-8K",
|
||||
"nvidia/Nemotron-H-47B-Base-8K",
|
||||
"nvidia/Nemotron-H-56B-Base-8K",
|
||||
"allenai/OLMo-1B-hf",
|
||||
"allenai/OLMo-7B-hf",
|
||||
"allenai/OLMo-2-0425-1B",
|
||||
"allenai/OLMoE-1B-7B-0924",
|
||||
"allenai/OLMoE-1B-7B-0924-Instruct",
|
||||
"facebook/opt-66b",
|
||||
"facebook/opt-iml-max-30b",
|
||||
"OrionStarAI/Orion-14B-Base",
|
||||
"OrionStarAI/Orion-14B-Chat",
|
||||
"microsoft/phi-1_5",
|
||||
"microsoft/phi-2",
|
||||
"microsoft/Phi-4-mini-instruct",
|
||||
"microsoft/Phi-4",
|
||||
"microsoft/Phi-3-mini-4k-instruct",
|
||||
"microsoft/Phi-3-mini-128k-instruct",
|
||||
"microsoft/Phi-3-medium-128k-instruct",
|
||||
"microsoft/Phi-3.5-MoE-instruct",
|
||||
"adept/persimmon-8b-base",
|
||||
"adept/persimmon-8b-chat",
|
||||
"pfnet/plamo-2-1b",
|
||||
"pfnet/plamo-2-8b",
|
||||
"Qwen/Qwen-7B",
|
||||
"Qwen/Qwen-7B-Chat",
|
||||
"Qwen/QwQ-32B-Preview",
|
||||
"Qwen/Qwen2-7B-Instruct",
|
||||
"Qwen/Qwen2-7B",
|
||||
"Qwen/Qwen2.5-0.5B-Instruct",
|
||||
"Qwen/Qwen1.5-MoE-A2.7B",
|
||||
"Qwen/Qwen1.5-MoE-A2.7B-Chat",
|
||||
"Qwen/Qwen3-8B",
|
||||
"Qwen/Qwen3-30B-A3B",
|
||||
"Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
"ByteDance-Seed/Seed-OSS-36B-Instruct",
|
||||
"stabilityai/stablelm-3b-4e1t",
|
||||
"stabilityai/stablelm-base-alpha-7b-v2",
|
||||
"bigcode/starcoder2-3b",
|
||||
"bigcode/starcoder2-7b",
|
||||
"bigcode/starcoder2-15b",
|
||||
"upstage/solar-pro-preview-instruct",
|
||||
"Tele-AI/TeleChat2-3B",
|
||||
"Tele-AI/TeleChat2-7B",
|
||||
"Tele-AI/TeleChat2-35B",
|
||||
"CofeAI/FLM-2-52B-Instruct-2407",
|
||||
"CofeAI/Tele-FLM",
|
||||
"xverse/XVERSE-7B-Chat",
|
||||
"xverse/XVERSE-13B-Chat",
|
||||
"xverse/XVERSE-65B-Chat",
|
||||
"MiniMaxAI/MiniMax-M1-40k",
|
||||
"MiniMaxAI/MiniMax-M1-80k",
|
||||
"MiniMaxAI/MiniMax-Text-01",
|
||||
"Zyphra/Zamba2-7B-instruct",
|
||||
"Zyphra/Zamba2-2.7B-instruct",
|
||||
"Zyphra/Zamba2-1.2B-instruct",
|
||||
"meituan-longcat/LongCat-Flash-Chat",
|
||||
"meituan-longcat/LongCat-Flash-Chat-FP8",
|
||||
"rhymes-ai/Aria",
|
||||
"CohereForAI/aya-vision-8b",
|
||||
"CohereForAI/aya-vision-32b",
|
||||
"Open-Bee/Bee-8B-RL",
|
||||
"Open-Bee/Bee-8B-SFT",
|
||||
"Salesforce/blip2-opt-2.7b",
|
||||
"Salesforce/blip2-opt-6.7b",
|
||||
"facebook/chameleon-7b",
|
||||
"CohereLabs/command-a-vision-07-2025",
|
||||
"deepseek-ai/deepseek-vl2-tiny",
|
||||
"deepseek-ai/deepseek-vl2-small",
|
||||
"deepseek-ai/deepseek-vl2",
|
||||
"deepseek-ai/DeepSeek-OCR",
|
||||
"baidu/ERNIE-4.5-VL-28B-A3B-PT",
|
||||
"baidu/ERNIE-4.5-VL-424B-A47B-PT",
|
||||
"adept/fuyu-8b",
|
||||
"google/gemma-3-4b-it",
|
||||
"google/gemma-3-27b-it",
|
||||
"zai-org/glm-4v-9b",
|
||||
"zai-org/cogagent-9b-20241220",
|
||||
"zai-org/GLM-4.1V-9B-Thinking",
|
||||
"zai-org/GLM-4.5V",
|
||||
"ibm-granite/granite-speech-3.3-8b",
|
||||
"h2oai/h2ovl-mississippi-800m",
|
||||
"h2oai/h2ovl-mississippi-2b",
|
||||
"HuggingFaceM4/Idefics3-8B-Llama3",
|
||||
"internlm/Intern-S1",
|
||||
"internlm/Intern-S1-mini",
|
||||
"OpenGVLab/InternVL3_5-14B",
|
||||
"OpenGVLab/InternVL3-9B",
|
||||
"OpenGVLab/InternVideo2_5_Chat_8B",
|
||||
"OpenGVLab/InternVL2_5-4B",
|
||||
"OpenGVLab/Mono-InternVL-2B",
|
||||
"OpenGVLab/InternVL2-4B",
|
||||
"OpenGVLab/InternVL3-1B-hf",
|
||||
"Kwai-Keye/Keye-VL-8B-Preview",
|
||||
"Kwai-Keye/Keye-VL-1_5-8B",
|
||||
"moonshotai/Kimi-VL-A3B-Instruct",
|
||||
"moonshotai/Kimi-VL-A3B-Thinking",
|
||||
"lightonai/LightOnOCR-1B",
|
||||
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||
"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
|
||||
"meta-llama/Llama-4-Maverick-17B-128E-Instruct",
|
||||
"nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1",
|
||||
"llava-hf/llava-1.5-7b-hf",
|
||||
"TIGER-Lab/Mantis-8B-siglip-llama3",
|
||||
"mistral-community/pixtral-12b",
|
||||
"llava-hf/llava-v1.6-mistral-7b-hf",
|
||||
"llava-hf/llava-v1.6-vicuna-7b-hf",
|
||||
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
||||
"llava-hf/llava-onevision-qwen2-7b-ov-hf",
|
||||
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
"mispeech/midashenglm-7b",
|
||||
"openbmb/MiniCPM-o-2_6",
|
||||
"openbmb/MiniCPM-V-2",
|
||||
"openbmb/MiniCPM-Llama3-V-2_5",
|
||||
"openbmb/MiniCPM-V-2_6",
|
||||
"openbmb/MiniCPM-V-4",
|
||||
"openbmb/MiniCPM-V-4_5",
|
||||
"MiniMaxAI/MiniMax-VL-01",
|
||||
"mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
||||
"allenai/Molmo-7B-D-0924",
|
||||
"allenai/Molmo-7B-O-0924",
|
||||
"nvidia/NVLM-D-72B",
|
||||
"AIDC-AI/Ovis2-1B",
|
||||
"AIDC-AI/Ovis1.6-Llama3.2-3B",
|
||||
"AIDC-AI/Ovis2.5-9B",
|
||||
"google/paligemma-3b-pt-224",
|
||||
"google/paligemma-3b-mix-224",
|
||||
"google/paligemma2-3b-ft-docci-448",
|
||||
"microsoft/Phi-3-vision-128k-instruct",
|
||||
"microsoft/Phi-3.5-vision-instruct",
|
||||
"microsoft/Phi-4-multimodal-instruct",
|
||||
"mistralai/Pixtral-12B-2409",
|
||||
"Qwen/Qwen-VL",
|
||||
"Qwen/Qwen-VL-Chat",
|
||||
"Qwen/Qwen2-Audio-7B-Instruct",
|
||||
"Qwen/QVQ-72B-Preview",
|
||||
"Qwen/Qwen2-VL-7B-Instruct",
|
||||
"Qwen/Qwen2-VL-72B-Instruct",
|
||||
"Qwen/Qwen2.5-VL-3B-Instruct",
|
||||
"Qwen/Qwen2.5-VL-72B-Instruct",
|
||||
"Qwen/Qwen2.5-Omni-3B",
|
||||
"Qwen/Qwen2.5-Omni-7B",
|
||||
"Qwen/Qwen3-VL-4B-Instruct",
|
||||
"Qwen/Qwen3-VL-30B-A3B-Instruct",
|
||||
"Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
||||
"Qwen/Qwen3-Omni-30B-A3B-Thinking",
|
||||
"YannQi/R-4B",
|
||||
"Skywork/Skywork-R1V-38B",
|
||||
"SmolVLM2-2.2B-Instruct",
|
||||
"stepfun-ai/step3",
|
||||
"omni-search/Tarsier-7b",
|
||||
"omni-search/Tarsier-34b",
|
||||
"omni-research/Tarsier2-Recap-7b",
|
||||
"omni-research/Tarsier2-7b-0115"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": ["huggingfaceName"]
|
||||
},
|
||||
"resources": {
|
||||
"type": "object",
|
||||
"description": "资源配置",
|
||||
"properties": {
|
||||
"gpuLimit": {
|
||||
"type": "integer",
|
||||
"description": "GPU 限制",
|
||||
"default": 1,
|
||||
"minimum": 1
|
||||
},
|
||||
"cpuRequest": {
|
||||
"type": "integer",
|
||||
"description": "CPU 请求",
|
||||
"default": 12,
|
||||
"minimum": 1
|
||||
},
|
||||
"memoryLimit": {
|
||||
"type": "string",
|
||||
"description": "内存限制",
|
||||
"default": "16Gi",
|
||||
"pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$"
|
||||
},
|
||||
"shmSize": {
|
||||
"type": "string",
|
||||
"description": "共享内存大小",
|
||||
"default": "20Gi",
|
||||
"pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$"
|
||||
}
|
||||
}
|
||||
},
|
||||
"workerSize": {
|
||||
"type": "integer",
|
||||
"description": "Worker 数量",
|
||||
"default": 1,
|
||||
"minimum": 1
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -39,14 +39,15 @@ resources:
|
||||
shmSize: "20Gi"
|
||||
|
||||
svc:
|
||||
type: NodePort
|
||||
type: LoadBalancer
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
nodePort: 30080
|
||||
# nodePort: 30080
|
||||
# vLLM 应用配置
|
||||
vllm:
|
||||
image: "docker.io/vllm/vllm-openai:latest"
|
||||
|
||||
command: ""
|
||||
|
||||
llama:
|
||||
image: "docker.io/library/one-click:v1"
|
||||
@ -60,13 +61,13 @@ lmdeploy:
|
||||
nfs:
|
||||
server: "10.6.80.11"
|
||||
path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
|
||||
storageClass: "local-path"
|
||||
# storageClass: "local-path"
|
||||
pvSize: "500Gi"
|
||||
pvcSize: "50Gi"
|
||||
|
||||
# LeaderWorkerSet 配置
|
||||
replicaCount: 1
|
||||
workerSize: 2
|
||||
workerSize: 1
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user