diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml index d5461cf..3c93a25 100644 --- a/vllm/vllm-deployment.yaml +++ b/vllm/vllm-deployment.yaml @@ -16,7 +16,7 @@ spec: runtimeClassName: nvidia containers: - name: vllm-inference-server - image: vllm/vllm-openai:v0.3.3 + image: vllm/vllm-openai:latest imagePullPolicy: IfNotPresent resources: @@ -33,7 +33,7 @@ spec: #args: #- while true; do sleep 2600; done command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] - args: ["--model=mistralai/Mistral-7B-v0.1", + args: ["--model=openai-community/gpt2", "--gpu-memory-utilization=0.95", "--disable-log-requests", "--trust-remote-code",