first backup of charts
This commit is contained in:
53
webchat/metadata.yaml
Normal file
53
webchat/metadata.yaml
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
application_name: &application_name webchat
|
||||
|
||||
distributed:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: llama
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2-VL-2B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 8
|
||||
memoryLimit: "8Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 2
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30081
|
||||
url: ~
|
||||
paths:
|
||||
docs_path: /docs
|
||||
redoc_path: /redoc
|
||||
pod:
|
||||
name: *application_name
|
||||
monolithic:
|
||||
method: helm
|
||||
release_name: *application_name
|
||||
chart: vllm-app
|
||||
sets:
|
||||
app: vllm
|
||||
model:
|
||||
huggingfaceName: "Qwen/Qwen2.5-32B-Instruct"
|
||||
resources:
|
||||
gpuLimit: 1
|
||||
cpuRequest: 12
|
||||
memoryLimit: "8Gi"
|
||||
shmSize: "15Gi"
|
||||
workerSize: 1
|
||||
nodeSelector: {}
|
||||
svc:
|
||||
svc_type: NodePort
|
||||
protocol: http
|
||||
hostname: 10.6.14.123
|
||||
port: 30080
|
||||
url: ~
|
||||
pod:
|
||||
name: *application_name
|
||||
|
||||
Reference in New Issue
Block a user