From a2636842846676308c574f5b2e6f2616c5f535c6 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 23:03:01 -0400 Subject: [PATCH] Upgrade version and use gpt2 --- vllm/vllm-deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml index d5461cf..3c93a25 100644 --- a/vllm/vllm-deployment.yaml +++ b/vllm/vllm-deployment.yaml @@ -16,7 +16,7 @@ spec: runtimeClassName: nvidia containers: - name: vllm-inference-server - image: vllm/vllm-openai:v0.3.3 + image: vllm/vllm-openai:latest imagePullPolicy: IfNotPresent resources: @@ -33,7 +33,7 @@ spec: #args: #- while true; do sleep 2600; done command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] - args: ["--model=mistralai/Mistral-7B-v0.1", + args: ["--model=openai-community/gpt2", "--gpu-memory-utilization=0.95", "--disable-log-requests", "--trust-remote-code",