mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
fix: exllama2 backend (#1484)
Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
This commit is contained in:
parent
eaa899df63
commit
6597881854
@ -7,7 +7,8 @@ import backend_pb2_grpc
|
||||
import argparse
|
||||
import signal
|
||||
import sys
|
||||
import os, glob
|
||||
import os
|
||||
import glob
|
||||
|
||||
from pathlib import Path
|
||||
import torch
|
||||
@ -40,6 +41,7 @@ MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
||||
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
def Health(self, request, context):
|
||||
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
||||
|
||||
def LoadModel(self, request, context):
|
||||
try:
|
||||
model_directory = request.ModelFile
|
||||
@ -85,13 +87,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
|
||||
|
||||
if request.Tokens != 0:
|
||||
tokens = request.Tokens
|
||||
output = self.generator.generate_simple(request.Prompt, settings, tokens, seed = self.seed)
|
||||
output = self.generator.generate_simple(
|
||||
request.Prompt, settings, tokens)
|
||||
|
||||
# Remove prompt from response if present
|
||||
if request.Prompt in output:
|
||||
output = output.replace(request.Prompt, "")
|
||||
|
||||
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
||||
return backend_pb2.Result(message=bytes(output, encoding='utf-8'))
|
||||
|
||||
def PredictStream(self, request, context):
|
||||
# Implement PredictStream RPC
|
||||
@ -124,6 +127,7 @@ def serve(address):
|
||||
except KeyboardInterrupt:
|
||||
server.stop(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
||||
parser.add_argument(
|
||||
|
Loading…
Reference in New Issue
Block a user