Gluttony-Cluster/openllm/openllm-deployment.yaml.off

37 lines
852 B
Plaintext
Raw Normal View History

2024-03-30 20:29:24 +00:00
apiVersion: apps/v1
kind: Deployment
metadata:
name: openllm-deployment
namespace: openllm-ns
spec:
replicas: 1
selector:
matchLabels:
app: openllm
template:
metadata:
labels:
app: openllm
spec:
2024-03-30 20:39:58 +00:00
runtimeClassName: nvidia
2024-03-30 20:29:24 +00:00
containers:
- name: openllm-container
image: ghcr.io/bentoml/openllm
command: ["start"]
args: ["${MODEL_NAME}", "--backend", "vllm"]
env:
# Set this to desired deployment model
- name: MODEL_NAME
value: "meta-llama/Llama-2-13b-hf"
- name: TRUST_REMOVE_CODE
value: "True"
- name: OPENLLM_DO_NOT_TRACK
value: "True"
ports:
- containerPort: 3000
resources:
limits:
2024-03-30 20:32:29 +00:00
nvidia.com/gpu: 1
2024-03-30 20:29:24 +00:00
nodeSelector:
kubernetes.io/os: linux