application_name: &application_name webchat distributed: method: helm release_name: *application_name chart: vllm-app sets: app: llama model: huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct" resources: gpuLimit: 1 cpuRequest: 8 memoryLimit: "8Gi" shmSize: "15Gi" workerSize: 2 nodeSelector: {} svc: svc_type: NodePort protocol: http hostname: 10.6.14.123 port: 30081 url: ~ paths: docs_path: /docs redoc_path: /redoc pod: name: *application_name monolithic: method: helm release_name: *application_name chart: vllm-app sets: app: vllm model: huggingfaceName: "Qwen/Qwen2.5-32B-Instruct" resources: gpuLimit: 1 cpuRequest: 12 memoryLimit: "8Gi" shmSize: "15Gi" workerSize: 1 nodeSelector: {} svc: svc_type: NodePort protocol: http hostname: 10.6.14.123 port: 30080 url: ~ pod: name: *application_name