54 lines
963 B
YAML
54 lines
963 B
YAML
|
|
application_name: &application_name vllm
|
|
|
|
distributed:
|
|
method: helm
|
|
release_name: *application_name
|
|
chart: vllm-app
|
|
sets:
|
|
app: vllm
|
|
model:
|
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
|
resources:
|
|
gpuLimit: 1
|
|
cpuRequest: 8
|
|
memoryLimit: "16Gi"
|
|
shmSize: "15Gi"
|
|
workerSize: 2
|
|
nodeSelector: {}
|
|
svc:
|
|
svc_type: NodePort
|
|
protocol: http
|
|
hostname: 10.6.14.123
|
|
port: 30080
|
|
url: ~
|
|
paths:
|
|
docs_path: /docs
|
|
redoc_path: /redoc
|
|
pod:
|
|
name: infer-0
|
|
monolithic:
|
|
method: helm
|
|
release_name: *application_name
|
|
chart: vllm-app
|
|
sets:
|
|
app: vllm
|
|
model:
|
|
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"
|
|
resources:
|
|
gpuLimit: 1
|
|
cpuRequest: 8
|
|
memoryLimit: "16Gi"
|
|
shmSize: "15Gi"
|
|
workerSize: 1
|
|
nodeSelector: {}
|
|
svc:
|
|
svc_type: NodePort
|
|
protocol: http
|
|
hostname: 10.6.14.123
|
|
port: 30080
|
|
url: ~
|
|
pod:
|
|
name: vllm
|
|
|