From ffeadbebff46e34d99754f93c02994363b703a31 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 22:45:48 -0400 Subject: [PATCH] Update dtype param --- vllm/vllm-deployment.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml index f48587b..42cbc9a 100644 --- a/vllm/vllm-deployment.yaml +++ b/vllm/vllm-deployment.yaml @@ -21,7 +21,7 @@ spec: resources: limits: - nvidia.com/gpu: 1 + nvidia.com/gpu: 2 env: - name: HUGGING_FACE_HUB_TOKEN value: "" @@ -38,6 +38,7 @@ spec: "--disable-log-requests", "--trust-remote-code", "--port=8000", + "--dtype=half", "--tensor-parallel-size=1"] ports: - containerPort: 8000