diff --git a/openllm/openllm-deployment.yaml b/openllm/openllm-deployment.yaml
new file mode 100644
index 0000000..11b73a7
--- /dev/null
+++ b/openllm/openllm-deployment.yaml
@@ -0,0 +1,35 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: openllm-deployment
+  namespace: openllm-ns
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: openllm
+  template:
+    metadata:
+      labels:
+        app: openllm
+    spec:
+      containers:
+      - name: openllm-container
+        image: ghcr.io/bentoml/openllm
+        command: ["start"]
+        args: ["${MODEL_NAME}", "--backend", "vllm"]
+        env:
+        # Set this to desired deployment model
+        - name: MODEL_NAME
+          value: "meta-llama/Llama-2-13b-hf"
+        - name: TRUST_REMOVE_CODE
+          value: "True"
+        - name: OPENLLM_DO_NOT_TRACK
+          value: "True"
+        ports:
+        - containerPort: 3000
+        resources:
+          limits:
+            nvidia.com/gpu: 2
+      nodeSelector:
+        kubernetes.io/os: linux
diff --git a/openllm/openllm-service.yaml b/openllm/openllm-service.yaml
new file mode 100644
index 0000000..e5fb3f2
--- /dev/null
+++ b/openllm/openllm-service.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: openllm-service
+  namespace: openllm-ns
+spec:
+  type: LoadBalancer
+  ports:
+  - port: 3000
+    targetPort: 3000
+  selector:
+    app: openllm