application_name: &application_name vllm distributed: method: helm release_name: *application_name chart: vllm-app sets: app: vllm model: huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" resources: gpuLimit: 1 cpuRequest: 8 memoryLimit: "16Gi" shmSize: "15Gi" workerSize: 2 nodeSelector: {} svc: svc_type: NodePort protocol: http hostname: 10.6.14.123 port: 30080 url: ~ paths: docs_path: /docs redoc_path: /redoc pod: name: infer-0 monolithic: method: helm release_name: *application_name chart: vllm-app sets: app: vllm model: huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" resources: gpuLimit: 1 cpuRequest: 8 memoryLimit: "16Gi" shmSize: "15Gi" workerSize: 1 nodeSelector: {} svc: svc_type: NodePort protocol: http hostname: 10.6.14.123 port: 30080 url: ~ pod: name: vllm