From 28a5a521ee878490adc98c37e49cd976122dfe14 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 22:39:54 -0400 Subject: [PATCH] Add nvidia runtime --- vllm/vllm-deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml index 9cb4da7..0237523 100644 --- a/vllm/vllm-deployment.yaml +++ b/vllm/vllm-deployment.yaml @@ -13,6 +13,7 @@ spec: labels: app: vllm-inference-server spec: + runtimeClassName: nvidia containers: - name: vllm-inference-server image: vllm/vllm-openai:v0.3.3