From 754584d291fda5080b37769b19ac8256bd72f1a9 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 16:29:24 -0400 Subject: [PATCH] Add openllm --- openllm/openllm-deployment.yaml | 35 +++++++++++++++++++++++++++++++++ openllm/openllm-service.yaml | 12 +++++++++++ 2 files changed, 47 insertions(+) create mode 100644 openllm/openllm-deployment.yaml create mode 100644 openllm/openllm-service.yaml diff --git a/openllm/openllm-deployment.yaml b/openllm/openllm-deployment.yaml new file mode 100644 index 0000000..11b73a7 --- /dev/null +++ b/openllm/openllm-deployment.yaml @@ -0,0 +1,35 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: openllm-deployment + namespace: openllm-ns +spec: + replicas: 1 + selector: + matchLabels: + app: openllm + template: + metadata: + labels: + app: openllm + spec: + containers: + - name: openllm-container + image: ghcr.io/bentoml/openllm + command: ["start"] + args: ["${MODEL_NAME}", "--backend", "vllm"] + env: + # Set this to desired deployment model + - name: MODEL_NAME + value: "meta-llama/Llama-2-13b-hf" + - name: TRUST_REMOVE_CODE + value: "True" + - name: OPENLLM_DO_NOT_TRACK + value: "True" + ports: + - containerPort: 3000 + resources: + limits: + nvidia.com/gpu: 2 + nodeSelector: + kubernetes.io/os: linux diff --git a/openllm/openllm-service.yaml b/openllm/openllm-service.yaml new file mode 100644 index 0000000..e5fb3f2 --- /dev/null +++ b/openllm/openllm-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: openllm-service + namespace: openllm-ns +spec: + type: LoadBalancer + ports: + - port: 3000 + targetPort: 3000 + selector: + app: openllm