# Default values for vllm-app.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets: []
imagePullPolicy: IfNotPresent
# This is to override the chart name.
nameOverride: ""
fullnameOverride: ""

# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
  # Specifies whether a service account should be created
  create: true
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
  
  
# 模型配置
model:
  huggingfaceName: "Qwen/Qwen2.5-0.5B-Instruct"  # 用户只需输入这个
  localMountPath: "/Model"                  # PVC 固定挂载路径
  huggingfaceToken: "<your-hf-token>"

# 功能选择

resources:
  gpuLimit: 1
  gpuMem: 0
  cpuRequest: 12
  memoryLimit: "16Gi"
  shmSize: "20Gi"

#  RDMA 配置部分
rdma:
  enabled: false  # 开关：默认关闭，防止在无 RDMA 节点报错
  interface: eth0 # NCCL/GLOO 通信使用的网卡名称 (有 RDMA 时可能是 ib0 或 bond0)
  resourceName: "rdma/rdma_shared_device_a" # RDMA 资源名称 (取决于你的 k8s 插件)
  resourceCount: 5   # 每个 Pod 需要的 RDMA 设备数量
  hca: "mlx5_0:1"    # 指定的 HCA 设备 (或者使用 ^mlx5 进行前缀匹配)
  gidIndex: "0"      # RoCEv2 通常需要指定 GID    
    
svc:
  type: LoadBalancer
  port: 80 
  targetPort: 8000 
  # nodePort: 30080 
# vLLM 应用配置
vllm:
  image: "docker.io/vllm/vllm-openai:latest"

command: ""

llama:
  image: "docker.io/library/one-click:v1"

# lmdeploy  应用配置
lmdeploy:
  image: "docker.io/openmmlab/lmdeploy:latest-cu12"


# NFS PV/PVC 配置
nfs:
  server: "10.6.80.11"
  path: "/volume1/Dataset/PVStore/lab-data-model-pvc-c0beeab1-6dd5-4c6a-bd2c-6ce9e114c25e/Weight"
  # storageClass: "local-path"
  pvSize: "500Gi"
  pvcSize: "50Gi"

# LeaderWorkerSet 配置
replicaCount: 1
workerSize: 1

nodeSelector: {}

tolerations: []

affinity: {}