fix: debug lws without IB, and set --distributed-executor-backend ray as default
All checks were successful
Publish Helm Charts / helm-publish (push) Successful in 8s

This commit is contained in:
Ivan087
2025-12-04 09:47:43 +08:00
parent 51163f0442
commit 97c5d559e3
2 changed files with 91 additions and 30 deletions

View File

@ -27,8 +27,6 @@ model:
huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct" # 用户只需输入这个
localMountPath: "/Model" # PVC 固定挂载路径
huggingfaceToken: "<your-hf-token>"
download: # 启用自动下载
image: "docker.io/vllm/vllm-openai:latest" # 包含 huggingface-cli 的镜像
# 功能选择
@ -38,6 +36,15 @@ resources:
memoryLimit: "16Gi"
shmSize: "20Gi"
# RDMA 配置部分
rdma:
enabled: false # 开关:默认关闭,防止在无 RDMA 节点报错
interface: eth0 # NCCL/GLOO 通信使用的网卡名称 (有 RDMA 时可能是 ib0 或 bond0)
resourceName: "rdma/rdma_shared_device_a" # RDMA 资源名称 (取决于你的 k8s 插件)
resourceCount: 5 # 每个 Pod 需要的 RDMA 设备数量
hca: "mlx5_0:1" # 指定的 HCA 设备 (或者使用 ^mlx5 进行前缀匹配)
gidIndex: "0" # RoCEv2 通常需要指定 GID
svc:
type: LoadBalancer
port: 80