fix(vllm): set default top_p with vllm (#1078)

**Description**

This PR fixes vllm when called with a request with an empty top_p

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2023-09-19 18:10:23 +02:00 committed by GitHub
parent 3a69bd3ef5
commit 453e9c5da9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -49,11 +49,13 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
return backend_pb2.Result(message="Model loaded successfully", success=True)
def Predict(self, request, context):
if request.TopP == 0:
request.TopP = 0.9
sampling_params = SamplingParams(temperature=request.Temperature, top_p=request.TopP)
outputs = self.llm.generate([request.Prompt], sampling_params)
generated_text = outputs[0].outputs[0].text
# Remove prompt from response if present
if request.Prompt in generated_text:
generated_text = generated_text.replace(request.Prompt, "")