first commit
This commit is contained in:
58
apps/vllm-server/base/deployment.yaml
Normal file
58
apps/vllm-server/base/deployment.yaml
Normal file
@ -0,0 +1,58 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: vllm-server
|
||||
labels:
|
||||
app.kubernetes.io/name: vllm-server
|
||||
app.kubernetes.io/component: model-server
|
||||
app.kubernetes.io/part-of: ocdp-workload
|
||||
annotations: {}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: vllm-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: vllm-server
|
||||
app.kubernetes.io/component: model-server
|
||||
app.kubernetes.io/part-of: ocdp-workload
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm
|
||||
image: vllm/vllm-openai:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- --host
|
||||
- 0.0.0.0
|
||||
- --port
|
||||
- "8000"
|
||||
- --model
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
- --served-model-name
|
||||
- default
|
||||
env:
|
||||
- name: HF_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: vllm-secrets
|
||||
key: hfToken
|
||||
optional: true
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8000
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 12Gi
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 24Gi
|
||||
|
||||
6
apps/vllm-server/base/kustomization.yaml
Normal file
6
apps/vllm-server/base/kustomization.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
|
||||
16
apps/vllm-server/base/service.yaml
Normal file
16
apps/vllm-server/base/service.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: vllm-server
|
||||
labels:
|
||||
app.kubernetes.io/name: vllm-server
|
||||
app.kubernetes.io/component: model-server
|
||||
app.kubernetes.io/part-of: ocdp-workload
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: vllm-server
|
||||
ports:
|
||||
- name: http
|
||||
port: 8000
|
||||
targetPort: http
|
||||
|
||||
Reference in New Issue
Block a user