Gluttony-Cluster/openllm/openllm-deployment.yaml

34 lines
777 B
YAML
Raw Normal View History

2024-03-30 20:29:24 +00:00
apiVersion: apps/v1
kind: Deployment
metadata:
name: openllm-deployment
namespace: openllm-ns
spec:
replicas: 1
selector:
matchLabels:
app: openllm
template:
metadata:
labels:
app: openllm
spec:
2024-03-30 20:39:58 +00:00
runtimeClassName: nvidia
2024-03-30 20:29:24 +00:00
containers:
- name: openllm-container
image: ghcr.io/bentoml/openllm
# Set this to desired deployment model
2024-03-30 22:05:28 +00:00
args: ["start", "HuggingFaceH4/zephyr-7b-beta", "--backend", "vllm"]
2024-03-30 22:02:14 +00:00
env:
2024-03-30 20:29:24 +00:00
- name: TRUST_REMOVE_CODE
value: "True"
- name: OPENLLM_DO_NOT_TRACK
value: "True"
ports:
- containerPort: 3000
resources:
limits:
2024-03-30 20:32:29 +00:00
nvidia.com/gpu: 1
2024-03-30 20:29:24 +00:00
nodeSelector:
kubernetes.io/os: linux