modefy config in ICC and support command in vllm and lmdeploy

This commit is contained in:
ivanwu
2025-11-10 17:50:45 +08:00
parent a158e24d3f
commit c7bd7a6e62
4 changed files with 361 additions and 10 deletions

View File

@ -56,11 +56,20 @@ spec:
- name: RAY_DEDUP_LOGS
value: "0"
command:
- sh
- -c
- "MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}'); MODEL_PATH='{{ .Values.model.localMountPath }}/'$MODEL_NAME;
echo 'Using single node ------------------------------------------';
python3 -m vllm.entrypoints.openai.api_server --port 8080 --model $MODEL_PATH --tensor-parallel-size {{ .Values.resources.gpuLimit }} --pipeline_parallel_size {{ .Values.workerSize }} --trust_remote_code"
          - /bin/sh
          - -c
          args:
          - |
            {{ .Values.command | default "
              MODEL_NAME=$(basename '{{ .Values.model.huggingfaceName }}');
              MODEL_PATH='{{ .Values.model.localMountPath }}/$MODEL_NAME';
              python3 -m vllm.entrypoints.openai.api_server \
                --port 8080 \
                --model $MODEL_PATH \
                --tensor-parallel-size {{ .Values.resources.gpuLimit }} \
                --pipeline_parallel_size {{ .Values.workerSize }} \
                --trust_remote_code
            " }}
resources:
limits:
nvidia.com/gpu: "{{ .Values.resources.gpuLimit }}"