Gluttony-Cluster/openllm/openllm-deployment.yaml
Tyler Perkins c4a812073d
All checks were successful
continuous-integration/drone/push Build is passing
Upgrade to nvidia runtime
2024-03-30 16:39:58 -04:00

37 lines
852 B
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: openllm-deployment
namespace: openllm-ns
spec:
replicas: 1
selector:
matchLabels:
app: openllm
template:
metadata:
labels:
app: openllm
spec:
runtimeClassName: nvidia
containers:
- name: openllm-container
image: ghcr.io/bentoml/openllm
command: ["start"]
args: ["${MODEL_NAME}", "--backend", "vllm"]
env:
# Set this to desired deployment model
- name: MODEL_NAME
value: "meta-llama/Llama-2-13b-hf"
- name: TRUST_REMOVE_CODE
value: "True"
- name: OPENLLM_DO_NOT_TRACK
value: "True"
ports:
- containerPort: 3000
resources:
limits:
nvidia.com/gpu: 1
nodeSelector:
kubernetes.io/os: linux