helm-charts/vllm-serve/values.schema.json

{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "vllm-serve Helm Chart Values",
  "description": "Schema for vllm-serve Helm chart values",
  "type": "object",
  "properties": {
    "model": {
      "type": "object",
      "description": "模型配置",
      "properties": {
        "huggingfaceName": {
          "type": "string",
          "description": "HuggingFace 模型名称",
          "default": "Qwen/Qwen2.5-0.5B-Instruct",
          "enum": [
            "swiss-ai/Apertus-8B-2509",
            "swiss-ai/Apertus-70B-Instruct-2509",
            "BAAI/Aquila-7B",
            "BAAI/AquilaChat-7B",
            "arcee-ai/AFM-4.5B-Base",
            "Snowflake/snowflake-arctic-base",
            "Snowflake/snowflake-arctic-instruct",
            "baichuan-inc/Baichuan2-13B-Chat",
            "baichuan-inc/Baichuan-7B",
            "inclusionAI/Ling-lite-1.5",
            "inclusionAI/Ling-plus",
            "inclusionAI/Ling-mini-2.0",
            "ibm-ai-platform/Bamba-9B-fp8",
            "ibm-ai-platform/Bamba-9B",
            "bigscience/bloom",
            "bigscience/bloomz",
            "zai-org/chatglm2-6b",
            "zai-org/chatglm3-6b",
            "CohereLabs/c4ai-command-r-v01",
            "CohereLabs/c4ai-command-r7b-12-2024",
            "CohereLabs/c4ai-command-a-03-2025",
            "CohereLabs/command-a-reasoning-08-2025",
            "databricks/dbrx-base",
            "databricks/dbrx-instruct",
            "nvidia/Llama-3_3-Nemotron-Super-49B-v1",
            "deepseek-ai/deepseek-llm-67b-base",
            "deepseek-ai/deepseek-llm-7b-chat",
            "deepseek-ai/DeepSeek-V2",
            "deepseek-ai/DeepSeek-V2-Chat",
            "deepseek-ai/DeepSeek-V3",
            "deepseek-ai/DeepSeek-R1",
            "deepseek-ai/DeepSeek-V3.1",
            "rednote-hilab/dots.llm1.base",
            "rednote-hilab/dots.llm1.inst",
            "rednote-hilab/dots.ocr",
            "baidu/ERNIE-4.5-0.3B-PT",
            "baidu/ERNIE-4.5-21B-A3B-PT",
            "baidu/ERNIE-4.5-300B-A47B-PT",
            "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
            "LGAI-EXAONE/EXAONE-4.0-32B",
            "mgleize/fairseq2-dummy-Llama-3.2-1B",
            "tiiuae/falcon-7b",
            "tiiuae/falcon-40b",
            "tiiuae/falcon-rw-7b",
            "tiiuae/falcon-mamba-7b",
            "tiiuae/falcon-mamba-7b-instruct",
            "tiiuae/Falcon-H1-34B-Base",
            "tiiuae/Falcon-H1-34B-Instruct",
            "allenai/FlexOlmo-7x7B-1T",
            "allenai/FlexOlmo-7x7B-1T-RT",
            "google/gemma-2b",
            "google/gemma-1.1-2b-it",
            "google/gemma-2-9b",
            "google/gemma-2-27b",
            "google/gemma-3-1b-it",
            "google/gemma-3n-E2B-it",
            "google/gemma-3n-E4B-it",
            "zai-org/glm-4-9b-chat-hf",
            "zai-org/GLM-4-32B-0414",
            "zai-org/GLM-4.5",
            "gpt2",
            "gpt2-xl",
            "bigcode/starcoder",
            "bigcode/gpt_bigcode-santacoder",
            "WizardLM/WizardCoder-15B-V1.0",
            "EleutherAI/gpt-j-6b",
            "nomic-ai/gpt4all-j",
            "EleutherAI/gpt-neox-20b",
            "EleutherAI/pythia-12b",
            "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
            "databricks/dolly-v2-12b",
            "stabilityai/stablelm-tuned-alpha-7b",
            "openai/gpt-oss-120b",
            "openai/gpt-oss-20b",
            "ibm-granite/granite-3.0-2b-base",
            "ibm-granite/granite-3.1-8b-instruct",
            "ibm/PowerLM-3b",
            "ibm-granite/granite-3.0-1b-a400m-base",
            "ibm-granite/granite-3.0-3b-a800m-instruct",
            "ibm/PowerMoE-3b",
            "ibm-granite/granite-4.0-tiny-preview",
            "parasail-ai/GritLM-7B-vllm",
            "hpcai-tech/grok-1",
            "tencent/Hunyuan-7B-Instruct",
            "tencent/Hunyuan-A13B-Instruct",
            "tencent/Hunyuan-A13B-Pretrain",
            "tencent/Hunyuan-A13B-Instruct-FP8",
            "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
            "internlm/internlm-7b",
            "internlm/internlm-chat-7b",
            "internlm/internlm2-7b",
            "internlm/internlm2-chat-7b",
            "internlm/internlm3-8b-instruct",
            "inceptionai/jais-13b",
            "inceptionai/jais-13b-chat",
            "inceptionai/jais-30b-v3",
            "inceptionai/jais-30b-chat-v3",
            "ai21labs/AI21-Jamba-1.5-Large",
            "ai21labs/AI21-Jamba-1.5-Mini",
            "ai21labs/Jamba-v0.1",
            "LiquidAI/LFM2-1.2B",
            "LiquidAI/LFM2-700M",
            "LiquidAI/LFM2-350M",
            "LiquidAI/LFM2-8B-A1B-preview",
            "meta-llama/Meta-Llama-3.1-405B-Instruct",
            "meta-llama/Meta-Llama-3.1-70B",
            "meta-llama/Meta-Llama-3-70B-Instruct",
            "meta-llama/Llama-2-70b-hf",
            "01-ai/Yi-34B",
            "state-spaces/mamba-130m-hf",
            "state-spaces/mamba-790m-hf",
            "state-spaces/mamba-2.8b-hf",
            "mistralai/Mamba-Codestral-7B-v0.1",
            "XiaomiMiMo/MiMo-7B-RL",
            "openbmb/MiniCPM-2B-sft-bf16",
            "openbmb/MiniCPM-2B-dpo-bf16",
            "openbmb/MiniCPM-S-1B-sft",
            "openbmb/MiniCPM3-4B",
            "MiniMaxAI/MiniMax-M2",
            "mistralai/Mistral-7B-v0.1",
            "mistralai/Mistral-7B-Instruct-v0.1",
            "mistralai/Mixtral-8x7B-v0.1",
            "mistralai/Mixtral-8x7B-Instruct-v0.1",
            "mistral-community/Mixtral-8x22B-v0.1",
            "mosaicml/mpt-7b",
            "mosaicml/mpt-7b-storywriter",
            "mosaicml/mpt-30b",
            "nvidia/Minitron-8B-Base",
            "mgoin/Nemotron-4-340B-Base-hf-FP8",
            "nvidia/Nemotron-H-8B-Base-8K",
            "nvidia/Nemotron-H-47B-Base-8K",
            "nvidia/Nemotron-H-56B-Base-8K",
            "allenai/OLMo-1B-hf",
            "allenai/OLMo-7B-hf",
            "allenai/OLMo-2-0425-1B",
            "allenai/OLMoE-1B-7B-0924",
            "allenai/OLMoE-1B-7B-0924-Instruct",
            "facebook/opt-66b",
            "facebook/opt-iml-max-30b",
            "OrionStarAI/Orion-14B-Base",
            "OrionStarAI/Orion-14B-Chat",
            "microsoft/phi-1_5",
            "microsoft/phi-2",
            "microsoft/Phi-4-mini-instruct",
            "microsoft/Phi-4",
            "microsoft/Phi-3-mini-4k-instruct",
            "microsoft/Phi-3-mini-128k-instruct",
            "microsoft/Phi-3-medium-128k-instruct",
            "microsoft/Phi-3.5-MoE-instruct",
            "adept/persimmon-8b-base",
            "adept/persimmon-8b-chat",
            "pfnet/plamo-2-1b",
            "pfnet/plamo-2-8b",
            "Qwen/Qwen-7B",
            "Qwen/Qwen-7B-Chat",
            "Qwen/QwQ-32B-Preview",
            "Qwen/Qwen2-7B-Instruct",
            "Qwen/Qwen2-7B",
            "Qwen/Qwen2.5-0.5B-Instruct",
            "Qwen/Qwen1.5-MoE-A2.7B",
            "Qwen/Qwen1.5-MoE-A2.7B-Chat",
            "Qwen/Qwen3-8B",
            "Qwen/Qwen3-30B-A3B",
            "Qwen/Qwen3-Next-80B-A3B-Instruct",
            "ByteDance-Seed/Seed-OSS-36B-Instruct",
            "stabilityai/stablelm-3b-4e1t",
            "stabilityai/stablelm-base-alpha-7b-v2",
            "bigcode/starcoder2-3b",
            "bigcode/starcoder2-7b",
            "bigcode/starcoder2-15b",
            "upstage/solar-pro-preview-instruct",
            "Tele-AI/TeleChat2-3B",
            "Tele-AI/TeleChat2-7B",
            "Tele-AI/TeleChat2-35B",
            "CofeAI/FLM-2-52B-Instruct-2407",
            "CofeAI/Tele-FLM",
            "xverse/XVERSE-7B-Chat",
            "xverse/XVERSE-13B-Chat",
            "xverse/XVERSE-65B-Chat",
            "MiniMaxAI/MiniMax-M1-40k",
            "MiniMaxAI/MiniMax-M1-80k",
            "MiniMaxAI/MiniMax-Text-01",
            "Zyphra/Zamba2-7B-instruct",
            "Zyphra/Zamba2-2.7B-instruct",
            "Zyphra/Zamba2-1.2B-instruct",
            "meituan-longcat/LongCat-Flash-Chat",
            "meituan-longcat/LongCat-Flash-Chat-FP8",
            "rhymes-ai/Aria",
            "CohereForAI/aya-vision-8b",
            "CohereForAI/aya-vision-32b",
            "Open-Bee/Bee-8B-RL",
            "Open-Bee/Bee-8B-SFT",
            "Salesforce/blip2-opt-2.7b",
            "Salesforce/blip2-opt-6.7b",
            "facebook/chameleon-7b",
            "CohereLabs/command-a-vision-07-2025",
            "deepseek-ai/deepseek-vl2-tiny",
            "deepseek-ai/deepseek-vl2-small",
            "deepseek-ai/deepseek-vl2",
            "deepseek-ai/DeepSeek-OCR",
            "baidu/ERNIE-4.5-VL-28B-A3B-PT",
            "baidu/ERNIE-4.5-VL-424B-A47B-PT",
            "adept/fuyu-8b",
            "google/gemma-3-4b-it",
            "google/gemma-3-27b-it",
            "zai-org/glm-4v-9b",
            "zai-org/cogagent-9b-20241220",
            "zai-org/GLM-4.1V-9B-Thinking",
            "zai-org/GLM-4.5V",
            "ibm-granite/granite-speech-3.3-8b",
            "h2oai/h2ovl-mississippi-800m",
            "h2oai/h2ovl-mississippi-2b",
            "HuggingFaceM4/Idefics3-8B-Llama3",
            "internlm/Intern-S1",
            "internlm/Intern-S1-mini",
            "OpenGVLab/InternVL3_5-14B",
            "OpenGVLab/InternVL3-9B",
            "OpenGVLab/InternVideo2_5_Chat_8B",
            "OpenGVLab/InternVL2_5-4B",
            "OpenGVLab/Mono-InternVL-2B",
            "OpenGVLab/InternVL2-4B",
            "OpenGVLab/InternVL3-1B-hf",
            "Kwai-Keye/Keye-VL-8B-Preview",
            "Kwai-Keye/Keye-VL-1_5-8B",
            "moonshotai/Kimi-VL-A3B-Instruct",
            "moonshotai/Kimi-VL-A3B-Thinking",
            "lightonai/LightOnOCR-1B",
            "meta-llama/Llama-4-Scout-17B-16E-Instruct",
            "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
            "meta-llama/Llama-4-Maverick-17B-128E-Instruct",
            "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1",
            "llava-hf/llava-1.5-7b-hf",
            "TIGER-Lab/Mantis-8B-siglip-llama3",
            "mistral-community/pixtral-12b",
            "llava-hf/llava-v1.6-mistral-7b-hf",
            "llava-hf/llava-v1.6-vicuna-7b-hf",
            "llava-hf/LLaVA-NeXT-Video-7B-hf",
            "llava-hf/llava-onevision-qwen2-7b-ov-hf",
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
            "mispeech/midashenglm-7b",
            "openbmb/MiniCPM-o-2_6",
            "openbmb/MiniCPM-V-2",
            "openbmb/MiniCPM-Llama3-V-2_5",
            "openbmb/MiniCPM-V-2_6",
            "openbmb/MiniCPM-V-4",
            "openbmb/MiniCPM-V-4_5",
            "MiniMaxAI/MiniMax-VL-01",
            "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
            "allenai/Molmo-7B-D-0924",
            "allenai/Molmo-7B-O-0924",
            "nvidia/NVLM-D-72B",
            "AIDC-AI/Ovis2-1B",
            "AIDC-AI/Ovis1.6-Llama3.2-3B",
            "AIDC-AI/Ovis2.5-9B",
            "google/paligemma-3b-pt-224",
            "google/paligemma-3b-mix-224",
            "google/paligemma2-3b-ft-docci-448",
            "microsoft/Phi-3-vision-128k-instruct",
            "microsoft/Phi-3.5-vision-instruct",
            "microsoft/Phi-4-multimodal-instruct",
            "mistralai/Pixtral-12B-2409",
            "Qwen/Qwen-VL",
            "Qwen/Qwen-VL-Chat",
            "Qwen/Qwen2-Audio-7B-Instruct",
            "Qwen/QVQ-72B-Preview",
            "Qwen/Qwen2-VL-7B-Instruct",
            "Qwen/Qwen2-VL-72B-Instruct",
            "Qwen/Qwen2.5-VL-3B-Instruct",
            "Qwen/Qwen2.5-VL-72B-Instruct",
            "Qwen/Qwen2.5-Omni-3B",
            "Qwen/Qwen2.5-Omni-7B",
            "Qwen/Qwen3-VL-4B-Instruct",
            "Qwen/Qwen3-VL-30B-A3B-Instruct",
            "Qwen/Qwen3-Omni-30B-A3B-Instruct",
            "Qwen/Qwen3-Omni-30B-A3B-Thinking",
            "YannQi/R-4B",
            "Skywork/Skywork-R1V-38B",
            "SmolVLM2-2.2B-Instruct",
            "stepfun-ai/step3",
            "omni-search/Tarsier-7b",
            "omni-search/Tarsier-34b",
            "omni-research/Tarsier2-Recap-7b",
            "omni-research/Tarsier2-7b-0115",
            "ByteDance/Dolphin",
            "ByteDance/Sa2VA-1B",
            "ByteDance/Sa2VA-4B",
            "ByteDance/Sa2VA-InternVL3-8B",
            "ByteDance/Sa2VA-Qwen3-VL-4B",
            "HuggingFaceH4/zephyr-7b-alpha",
            "HuggingFaceH4/zephyr-7b-beta",
            "Qwen/QwQ-32B",
            "Qwen/QwQ-32B-AWQ",
            "Qwen/QwQ-32B-GGUF",
            "Qwen/Qwen-1_8B-Chat",
            "Qwen/Qwen-Audio",
            "Qwen/Qwen-Audio-Chat",
            "Qwen/Qwen2-VL-2B-Instruct",
            "Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
            "Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8",
            "Qwen/Qwen3-14B",
            "Qwen/Qwen3-14B-AWQ",
            "Qwen/Qwen3-14B-Base",
            "Qwen/Qwen3-235B-A22B",
            "Qwen/Qwen3-235B-A22B-Instruct-2507",
            "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
            "Qwen/Qwen3-235B-A22B-Thinking-2507",
            "Qwen/Qwen3-30B-A3B-Instruct-2507",
            "Qwen/Qwen3-30B-A3B-Thinking-2507",
            "Qwen/Qwen3-32B",
            "Qwen/Qwen3-32B-FP8",
            "Qwen/Qwen3-32B-GGUF",
            "Qwen/Qwen3-4B",
            "Qwen/Qwen3-4B-Base",
            "Qwen/Qwen3-4B-FP8",
            "Qwen/Qwen3-4B-GGUF",
            "Qwen/Qwen3-4B-Instruct-2507",
            "Qwen/Qwen3-4B-Instruct-2507-FP8",
            "Qwen/Qwen3-4B-SafeRL",
            "Qwen/Qwen3-4B-Thinking-2507",
            "Qwen/Qwen3-4B-Thinking-2507-FP8",
            "Qwen/Qwen3-8B-AWQ",
            "Qwen/Qwen3-8B-Base",
            "Qwen/Qwen3-8B-FP8",
            "Qwen/Qwen3-8B-GGUF",
            "Qwen/Qwen3-Coder-30B-A3B-Instruct",
            "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8",
            "Qwen/Qwen3-Coder-480B-A35B-Instruct",
            "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
            "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
            "Qwen/Qwen3-Next-80B-A3B-Thinking",
            "Qwen/Qwen3-VL-2B-Instruct",
            "Qwen/Qwen3-VL-2B-Instruct-FP8",
            "Qwen/Qwen3-VL-2B-Instruct-GGUF",
            "Qwen/Qwen3-VL-2B-Thinking",
            "Qwen/Qwen3-VL-2B-Thinking-GGUF",
            "Qwen/Qwen3-VL-30B-A3B-Instruct-GGUF",
            "Qwen/Qwen3-VL-32B-Instruct",
            "Qwen/Qwen3-VL-32B-Instruct-FP8",
            "Qwen/Qwen3-VL-32B-Instruct-GGUF",
            "Qwen/Qwen3-VL-32B-Thinking",
            "Qwen/Qwen3-VL-4B-Instruct-GGUF",
            "Qwen/Qwen3-VL-4B-Thinking",
            "Qwen/Qwen3-VL-8B-Instruct",
            "Qwen/Qwen3-VL-8B-Instruct-GGUF",
            "Qwen/Qwen3-VL-8B-Thinking-GGUF",
            "Qwen/Qwen3Guard-Gen-4B",
            "Qwen/Qwen3Guard-Gen-8B",
            "Skywork/Skywork-SWE-32B",
            "baichuan-inc/Baichuan-M2-32B",
            "deepseek-ai/DeepSeek-Coder-V2-Instruct",
            "deepseek-ai/DeepSeek-Coder-V2-Lite-Base",
            "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
            "deepseek-ai/DeepSeek-Prover-V2-671B",
            "deepseek-ai/DeepSeek-R1-0528",
            "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
            "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
            "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
            "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
            "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
            "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
            "deepseek-ai/DeepSeek-V2-Lite-Chat",
            "deepseek-ai/DeepSeek-V3-0324",
            "deepseek-ai/deepseek-llm-67b-chat",
            "deepseek-ai/deepseek-llm-7b-base",
            "deepseek-ai/deepseek-moe-16b-base",
            "deepseek-ai/deepseek-vl-7b-chat",
            "google/datagemma-rag-27b-it",
            "google/gemma-2-27b-it",
            "google/gemma-2-2b",
            "google/gemma-2-2b-it",
            "google/gemma-2-9b-it",
            "google/gemma-2b-it",
            "google/gemma-3-12b-it",
            "google/gemma-3-12b-it-qat-q4_0-gguf",
            "google/gemma-3-12b-pt",
            "google/gemma-3-1b-it-qat-int4-unquantized",
            "google/gemma-3-1b-it-qat-q4_0-gguf",
            "google/gemma-3-1b-pt",
            "google/gemma-3-270m",
            "google/gemma-3-270m-it",
            "google/gemma-3-4b-it-qat-q4_0-gguf",
            "google/gemma-3-4b-pt",
            "google/gemma-3n-E2B",
            "google/gemma-3n-E4B",
            "google/gemma-7b",
            "google/gemma-7b-it",
            "google/medgemma-27b-it",
            "google/medgemma-27b-text-it",
            "google/medgemma-4b-it",
            "google/medgemma-4b-pt",
            "google/paligemma2-3b-mix-224",
            "google/paligemma2-3b-mix-448",
            "google/shieldgemma-2-4b-it",
            "google/shieldgemma-2b",
            "google/txgemma-2b-predict",
            "google/vaultgemma-1b",
            "meta-llama/CodeLlama-70b-hf",
            "meta-llama/Llama-2-13b-hf",
            "meta-llama/Llama-2-70b-chat-hf",
            "meta-llama/Llama-2-7b-chat-hf",
            "meta-llama/Llama-2-7b-hf",
            "meta-llama/Llama-4-Scout-17B-16E",
            "meta-llama/Llama-Guard-3-8B",
            "meta-llama/Llama-Guard-4-12B",
            "meta-llama/Meta-Llama-3-8B",
            "meta-llama/Meta-Llama-3-8B-Instruct",
            "meta-llama/Meta-Llama-Guard-2-8B",
            "microsoft/BioGPT-Large",
            "microsoft/DialoGPT-medium",
            "microsoft/DialoGPT-small",
            "microsoft/Florence-2-base",
            "microsoft/Florence-2-large",
            "microsoft/Florence-2-large-ft",
            "microsoft/GUI-Actor-7B-Qwen2-VL",
            "microsoft/MAI-DS-R1",
            "microsoft/MAI-DS-R1-FP8",
            "microsoft/MediPhi-Clinical",
            "microsoft/OmniParser",
            "microsoft/Phi-3-mini-4k-instruct-gguf",
            "microsoft/Phi-3-mini-4k-instruct-onnx",
            "microsoft/Phi-4-mini-flash-reasoning",
            "microsoft/llava-med-7b-delta",
            "microsoft/phi-4",
            "microsoft/phi-4-gguf",
            "tencent/DRIVE-RL",
            "tencent/DRIVE-SFT",
            "Qwen/Qwen2.5-14B",
            "BAAI/bge-base-zh-v1.5",
            "01-ai/Yi-1.5-34B",
            "01-ai/Yi-1.5-6B",
            "01-ai/Yi-1.5-9B",
            "01-ai/Yi-34B-Chat",
            "ByteDance/Dolphin-1.5",
            "ByteDance/Ouro-1.4B",
            "ByteDance/Ouro-1.4B-Thinking",
            "ByteDance/Ouro-2.6B",
            "ByteDance/Ouro-2.6B-Thinking",
            "OpenGVLab/InternVL3-38B",
            "OpenGVLab/InternVL3-78B",
            "OpenGVLab/InternVL3_5-1B",
            "OpenGVLab/InternVL3_5-30B-A3B-Instruct",
            "OpenGVLab/InternVL3_5-38B",
            "OpenGVLab/InternVL3_5-8B",
            "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview",
            "OpenGVLab/Mini-InternVL2-2B-DA-Medical",
            "OpenGVLab/SDLM-32B-D4",
            "Qwen/CodeQwen1.5-7B-Chat",
            "Qwen/Qwen1.5-1.8B",
            "Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4",
            "Qwen/Qwen2-0.5B",
            "Qwen/Qwen2-Math-72B-Instruct",
            "Qwen/Qwen2.5-0.5B",
            "Qwen/Qwen2.5-1.5B",
            "Qwen/Qwen2.5-1.5B-Instruct",
            "Qwen/Qwen2.5-1.5B-Instruct-GGUF",
            "Qwen/Qwen2.5-14B-Instruct",
            "Qwen/Qwen2.5-14B-Instruct-1M",
            "Qwen/Qwen2.5-32B-Instruct",
            "Qwen/Qwen2.5-32B-Instruct-AWQ",
            "Qwen/Qwen2.5-32B-Instruct-GPTQ-Int4",
            "Qwen/Qwen2.5-3B-Instruct",
            "Qwen/Qwen2.5-3B-Instruct-GGUF",
            "Qwen/Qwen2.5-72B-Instruct",
            "Qwen/Qwen2.5-7B",
            "Qwen/Qwen2.5-7B-Instruct",
            "Qwen/Qwen2.5-7B-Instruct-1M",
            "Qwen/Qwen2.5-7B-Instruct-GGUF",
            "Qwen/Qwen2.5-Coder-0.5B",
            "Qwen/Qwen2.5-Coder-1.5B",
            "Qwen/Qwen2.5-Coder-1.5B-Instruct",
            "Qwen/Qwen2.5-Coder-14B-Instruct",
            "Qwen/Qwen2.5-Coder-32B-Instruct",
            "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
            "Qwen/Qwen2.5-Coder-7B-Instruct",
            "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
            "Qwen/Qwen2.5-Math-72B",
            "Qwen/Qwen2.5-Math-7B",
            "Qwen/Qwen2.5-VL-32B-Instruct",
            "Qwen/Qwen2.5-VL-72B-Instruct-AWQ",
            "Qwen/Qwen2.5-VL-7B-Instruct",
            "Qwen/Qwen3-0.6B",
            "Qwen/Qwen3-0.6B-Base",
            "Qwen/Qwen3-1.7B",
            "Qwen/Qwen3-1.7B-FP8",
            "Qwen/Qwen3-14B-FP8",
            "Qwen/Qwen3-14B-GGUF",
            "Qwen/Qwen3-30B-A3B-Base",
            "Qwen/Qwen3-30B-A3B-Thinking-2507-FP8",
            "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8",
            "Qwen/Qwen3-VL-4B-Instruct-FP8",
            "baichuan-inc/Baichuan-M2-32B-GPTQ-Int4",
            "baidu/ERNIE-4.5-0.3B-Base-PT",
            "baidu/ERNIE-4.5-21B-A3B-Base-PT",
            "baidu/ERNIE-4.5-VL-28B-A3B-Base-PT",
            "baidu/ERNIE-4.5-VL-28B-A3B-Thinking",
            "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
            "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
            "deepseek-ai/DeepSeek-V3.1-Terminus",
            "deepseek-ai/DeepSeek-V3.2-Exp",
            "deepseek-ai/deepseek-coder-1.3b-base",
            "deepseek-ai/deepseek-coder-1.3b-instruct",
            "deepseek-ai/deepseek-coder-6.7b-instruct",
            "google/codegemma-1.1-2b-GGUF",
            "google/gemma-3-27b-pt",
            "google/paligemma2-28b-pt-896",
            "google/reformer-crime-and-punishment",
            "google/reformer-enwik8",
            "google/t5-11b-ssm-nq",
            "google/t5-3b-ssm-nq",
            "google/t5-large-ssm-nq",
            "google/t5-small-ssm-nq",
            "google/t5-xl-ssm-nq",
            "google/t5-xxl-ssm-nq",
            "google/t5gemma-ml-ml-ul2-it",
            "internlm/Spatial-SSRL-7B",
            "llava-hf/llava-1.5-13b-hf",
            "llava-hf/llava-v1.6-34b-hf",
            "meta-llama/Llama-3.1-405B",
            "meta-llama/Llama-3.1-405B-Instruct",
            "meta-llama/Llama-3.1-70B",
            "meta-llama/Llama-3.1-70B-Instruct",
            "meta-llama/Llama-3.1-8B",
            "meta-llama/Llama-3.1-8B-Instruct",
            "meta-llama/Llama-3.2-11B-Vision-Instruct",
            "meta-llama/Llama-3.2-1B",
            "meta-llama/Llama-3.2-1B-Instruct",
            "meta-llama/Llama-3.2-3B",
            "meta-llama/Llama-3.2-3B-Instruct",
            "meta-llama/Llama-3.2-90B-Vision",
            "meta-llama/Llama-3.2-90B-Vision-Instruct",
            "meta-llama/Llama-3.3-70B-Instruct",
            "microsoft/MediPhi-Instruct",
            "microsoft/Phi-3-medium-4k-instruct-onnx-cpu",
            "microsoft/Phi-3.5-mini-instruct",
            "microsoft/bitnet-b1.58-2B-4T",
            "microsoft/bitnet-b1.58-2B-4T-gguf",
            "microsoft/kosmos-2.5",
            "microsoft/kosmos-2.5-chat",
            "microsoft/llava-med-v1.5-mistral-7b",
            "mistralai/Mistral-7B-Instruct-v0.2",
            "moonshotai/Kimi-Dev-72B",
            "moonshotai/Kimi-K2-Base",
            "moonshotai/Kimi-K2-Instruct",
            "moonshotai/Kimi-K2-Instruct-0905",
            "moonshotai/Kimi-K2-Thinking",
            "moonshotai/Kimi-Linear-48B-A3B-Base",
            "moonshotai/Kimi-Linear-48B-A3B-Instruct",
            "moonshotai/Moonlight-16B-A3B",
            "openbmb/MiniCPM4.1-8B",
            "tencent/DeepSeek-V3.1-Terminus-W4AFP8",
            "tencent/Hunyuan-0.5B-Pretrain",
            "zai-org/GLM-4-9B-0414",
            "zai-org/GLM-4.1V-9B-Base",
            "zai-org/GLM-4.5-Air",
            "zai-org/GLM-4.5V-FP8",
            "zai-org/GLM-4.6",
            "zai-org/GLM-4.6-FP8",
            "zai-org/GLM-Z1-32B-0414",
            "zai-org/GLM-Z1-9B-0414",
            "zai-org/Glyph",
            "zai-org/UI2Code_N",
            "zai-org/WebVIA-Agent",
            "zai-org/codegeex4-all-9b"
          ]
        }
      },
      "required": [
        "huggingfaceName"
      ]
    },
    "resources": {
      "type": "object",
      "description": "资源配置",
      "properties": {
        "gpuLimit": {
          "type": "integer",
          "description": "GPU 限制",
          "default": 1,
          "minimum": 1
        },
	"gpuMem": {
          "type": "integer",
          "description": "GPU 显存限制，单位MB, 0表示独占卡",
          "default": 0,
          "minimum": 0
        },
        "cpuRequest": {
          "type": "integer",
          "description": "CPU 请求",
          "default": 12,
          "minimum": 1
        },
        "memoryLimit": {
          "type": "string",
          "description": "内存限制",
          "default": "16Gi",
          "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$"
        },
        "shmSize": {
          "type": "string",
          "description": "共享内存大小",
          "default": "20Gi",
          "pattern": "^[0-9]+(\\.[0-9]+)?(Mi|Gi|Ti)$"
        }
      }
    },
    "workerSize": {
      "type": "integer",
      "description": "Worker 数量",
      "default": 1,
      "minimum": 1
    },
    "command": {
      "type": "string",
      "description": "自定义命令，模型路径路为 /Model/Weight/Qwen3-0.6B, LoRA 路径为 /Model/LoRA/Qwen3-0.6B (可选)  \n e.g. vllm serve --model /Model/Weight/Qwen3-0.6B ",
      "default": ""
    }
  }
}