From 453e9c5da99c1ea7225f5a22a466b88b96574f3c Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 19 Sep 2023 18:10:23 +0200 Subject: [PATCH] fix(vllm): set default top_p with vllm (#1078) **Description** This PR fixes vllm when called with a request with an empty top_p Signed-off-by: Ettore Di Giacinto --- extra/grpc/vllm/backend_vllm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/extra/grpc/vllm/backend_vllm.py b/extra/grpc/vllm/backend_vllm.py index 4b884f6f..a35cbc74 100644 --- a/extra/grpc/vllm/backend_vllm.py +++ b/extra/grpc/vllm/backend_vllm.py @@ -49,11 +49,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): return backend_pb2.Result(message="Model loaded successfully", success=True) def Predict(self, request, context): + if request.TopP == 0: + request.TopP = 0.9 + sampling_params = SamplingParams(temperature=request.Temperature, top_p=request.TopP) outputs = self.llm.generate([request.Prompt], sampling_params) generated_text = outputs[0].outputs[0].text - # Remove prompt from response if present if request.Prompt in generated_text: generated_text = generated_text.replace(request.Prompt, "")