2024-03-30 20:29:24 +00:00
|
|
|
apiVersion: apps/v1
|
|
|
|
kind: Deployment
|
|
|
|
metadata:
|
|
|
|
name: openllm-deployment
|
|
|
|
namespace: openllm-ns
|
|
|
|
spec:
|
|
|
|
replicas: 1
|
|
|
|
selector:
|
|
|
|
matchLabels:
|
|
|
|
app: openllm
|
|
|
|
template:
|
|
|
|
metadata:
|
|
|
|
labels:
|
|
|
|
app: openllm
|
|
|
|
spec:
|
2024-03-30 20:39:58 +00:00
|
|
|
runtimeClassName: nvidia
|
2024-03-30 20:29:24 +00:00
|
|
|
containers:
|
|
|
|
- name: openllm-container
|
|
|
|
image: ghcr.io/bentoml/openllm
|
|
|
|
# Set this to desired deployment model
|
2024-03-30 22:05:28 +00:00
|
|
|
args: ["start", "HuggingFaceH4/zephyr-7b-beta", "--backend", "vllm"]
|
2024-03-30 22:02:14 +00:00
|
|
|
env:
|
2024-03-30 20:29:24 +00:00
|
|
|
- name: TRUST_REMOVE_CODE
|
|
|
|
value: "True"
|
|
|
|
- name: OPENLLM_DO_NOT_TRACK
|
|
|
|
value: "True"
|
|
|
|
ports:
|
|
|
|
- containerPort: 3000
|
|
|
|
resources:
|
|
|
|
limits:
|
2024-03-30 20:32:29 +00:00
|
|
|
nvidia.com/gpu: 1
|
2024-03-30 20:29:24 +00:00
|
|
|
nodeSelector:
|
|
|
|
kubernetes.io/os: linux
|