Files
ocdp-workload-manifests/apps/vllm-server/base/deployment.yaml
2026-05-28 07:21:41 +00:00

59 lines
1.4 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: vllm-server
labels:
app.kubernetes.io/name: vllm-server
app.kubernetes.io/component: model-server
app.kubernetes.io/part-of: ocdp-workload
annotations: {}
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: vllm-server
template:
metadata:
labels:
app.kubernetes.io/name: vllm-server
app.kubernetes.io/component: model-server
app.kubernetes.io/part-of: ocdp-workload
spec:
containers:
- name: vllm
image: vllm/vllm-openai:latest
imagePullPolicy: IfNotPresent
args:
- --host
- 0.0.0.0
- --port
- "8000"
- --model
- Qwen/Qwen2.5-7B-Instruct
- --served-model-name
- default
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: vllm-secrets
key: hfToken
optional: true
ports:
- name: http
containerPort: 8000
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: "2"
memory: 12Gi
limits:
cpu: "4"
memory: 24Gi