modefy config in ICC and support command in vllm and lmdeploy
This commit is contained in:
@ -56,11 +56,20 @@ spec:
|
||||
- name: RAY_DEDUP_LOGS
|
||||
value: "0"
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
|
||||
echo 'Using single node ------------------------------------------';
|
||||
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
{{ .Values.command | default "
|
||||
MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
|
||||
MODEL_PATH='{{ .Values.model.localMountPath }}/$MODEL_NAME';
|
||||
python3 -m vllm.entrypoints.openai.api_server \
|
||||
--port 8080 \
|
||||
--model $MODEL_PATH \
|
||||
--tensor-parallel-size {{ .Values.resources.gpuLimit }} \
|
||||
--pipeline_parallel_size {{ .Values.workerSize }} \
|
||||
--trust_remote_code
|
||||
" }}
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"
|
||||
|
||||
Reference in New Issue
Block a user