diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml index 9cb4da7..0237523 100644 --- a/vllm/vllm-deployment.yaml +++ b/vllm/vllm-deployment.yaml @@ -13,6 +13,7 @@ spec: labels: app: vllm-inference-server spec: + runtimeClassName: nvidia containers: - name: vllm-inference-server image: vllm/vllm-openai:v0.3.3