diff --git a/openllm/openllm-deployment.yaml b/openllm/openllm-deployment.yaml new file mode 100644 index 0000000..11b73a7 --- /dev/null +++ b/openllm/openllm-deployment.yaml @@ -0,0 +1,35 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: openllm-deployment + namespace: openllm-ns +spec: + replicas: 1 + selector: + matchLabels: + app: openllm + template: + metadata: + labels: + app: openllm + spec: + containers: + - name: openllm-container + image: ghcr.io/bentoml/openllm + command: ["start"] + args: ["${MODEL_NAME}", "--backend", "vllm"] + env: + # Set this to desired deployment model + - name: MODEL_NAME + value: "meta-llama/Llama-2-13b-hf" + - name: TRUST_REMOVE_CODE + value: "True" + - name: OPENLLM_DO_NOT_TRACK + value: "True" + ports: + - containerPort: 3000 + resources: + limits: + nvidia.com/gpu: 2 + nodeSelector: + kubernetes.io/os: linux diff --git a/openllm/openllm-service.yaml b/openllm/openllm-service.yaml new file mode 100644 index 0000000..e5fb3f2 --- /dev/null +++ b/openllm/openllm-service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: openllm-service + namespace: openllm-ns +spec: + type: LoadBalancer + ports: + - port: 3000 + targetPort: 3000 + selector: + app: openllm