2023-08-07 20:39:10 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
from concurrent import futures
|
|
|
|
import argparse
|
|
|
|
import signal
|
|
|
|
import sys
|
|
|
|
import os
|
feat(conda): conda environments (#1144)
* feat(autogptq): add a separate conda environment for autogptq (#1137)
**Description**
This PR related to #1117
**Notes for Reviewers**
Here we lock down the version of the dependencies. Make sure it can be
used all the time without failed if the version of dependencies were
upgraded.
I change the order of importing packages according to the pylint, and no
change the logic of code. It should be ok.
I will do more investigate on writing some test cases for every backend.
I can run the service in my environment, but there is not exist a way to
test it. So, I am not confident on it.
Add a README.md in the `grpc` root. This is the common commands for
creating `conda` environment. And it can be used to the reference file
for creating extral gRPC backend document.
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* [Extra backend] Add seperate environment for ttsbark (#1141)
**Description**
This PR relates to #1117
**Notes for Reviewers**
Same to the latest PR:
* The code is also changed, but only the order of the import package
parts. And some code comments are also added.
* Add a configuration of the `conda` environment
* Add a simple test case for testing if the service can be startup in
current `conda` environment. It is succeed in VSCode, but the it is not
out of box on terminal. So, it is hard to say the test case really
useful.
**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions
-------------------------
The draft above helps to give a quick overview of your PR.
Remember to remove this comment and to at least:
1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!
By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
If no one reviews your PR within a few days, please @-mention @mudler.
-->
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda): add make target and entrypoints for the dockerfile
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda): Add seperate conda env for diffusers (#1145)
**Description**
This PR relates to #1117
**Notes for Reviewers**
* Add `conda` env `diffusers.yml`
* Add Makefile to create it automatically
* Add `run.sh` to support running as a extra backend
* Also adding it to the main Dockerfile
* Add make command in the root Makefile
* Testing the server, it can start up under the env
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda):Add seperate env for vllm (#1148)
**Description**
This PR is related to #1117
**Notes for Reviewers**
* The gRPC server can be started as normal
* The test case can be triggered in VSCode
* Same to other this kind of PRs, add `vllm.yml` Makefile and add
`run.sh` to the main Dockerfile, and command to the main Makefile
**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions
-------------------------
The draft above helps to give a quick overview of your PR.
Remember to remove this comment and to at least:
1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!
By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
If no one reviews your PR within a few days, please @-mention @mudler.
-->
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda):Add seperate env for huggingface (#1146)
**Description**
This PR is related to #1117
**Notes for Reviewers**
* Add conda env `huggingface.yml`
* Change the import order, and also remove the no-used packages
* Add `run.sh` and `make command` to the main Dockerfile and Makefile
* Add test cases for it. It can be triggered and succeed under VSCode
Python extension but it is hang by using `python -m unites
test_huggingface.py` in the terminal
```
Running tests (unittest): /workspaces/LocalAI/extra/grpc/huggingface
Running tests: /workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_embedding
/workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_load_model
/workspaces/LocalAI/extra/grpc/huggingface/test_huggingface.py::TestBackendServicer::test_server_startup
./test_huggingface.py::TestBackendServicer::test_embedding Passed
./test_huggingface.py::TestBackendServicer::test_load_model Passed
./test_huggingface.py::TestBackendServicer::test_server_startup Passed
Total number of tests expected to run: 3
Total number of tests run: 3
Total number of tests passed: 3
Total number of tests failed: 0
Total number of tests failed with errors: 0
Total number of tests skipped: 0
Finished running tests!
```
**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions
-------------------------
The draft above helps to give a quick overview of your PR.
Remember to remove this comment and to at least:
1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!
By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
If no one reviews your PR within a few days, please @-mention @mudler.
-->
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda): Add the seperate conda env for VALL-E X (#1147)
**Description**
This PR is related to #1117
**Notes for Reviewers**
* The gRPC server cannot start up
```
(ttsvalle) @Aisuko ➜ /workspaces/LocalAI (feat/vall-e-x) $ /opt/conda/envs/ttsvalle/bin/python /workspaces/LocalAI/extra/grpc/vall-e-x/ttsvalle.py
Traceback (most recent call last):
File "/workspaces/LocalAI/extra/grpc/vall-e-x/ttsvalle.py", line 14, in <module>
from utils.generation import SAMPLE_RATE, generate_audio, preload_models
ModuleNotFoundError: No module named 'utils'
```
The installation steps follow
https://github.com/Plachtaa/VALL-E-X#-installation below:
* Under the `ttsvalle` conda env
```
git clone https://github.com/Plachtaa/VALL-E-X.git
cd VALL-E-X
pip install -r requirements.txt
```
**[Signed
commits](../CONTRIBUTING.md#signing-off-on-commits-developer-certificate-of-origin)**
- [x] Yes, I signed my commits.
<!--
Thank you for contributing to LocalAI!
Contributing Conventions
-------------------------
The draft above helps to give a quick overview of your PR.
Remember to remove this comment and to at least:
1. Include descriptive PR titles with [<component-name>] prepended. We
use [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/).
2. Build and test your changes before submitting a PR (`make build`).
3. Sign your commits
4. **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below).
5. **X/Twitter handle:** we announce bigger features on X/Twitter. If
your PR gets announced, and you'd like a mention, we'll gladly shout you
out!
By following the community's contribution conventions upfront, the
review process will
be accelerated and your PR merged more quickly.
If no one reviews your PR within a few days, please @-mention @mudler.
-->
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fix: set image type
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* feat(conda):Add seperate conda env for exllama (#1149)
Add seperate env for exllama
Signed-off-by: Aisuko <urakiny@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Setup conda
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Set image_type arg
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* ci: prepare only conda env in tests
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* Dockerfile: comment manual pip calls
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* conda: add conda to PATH
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* fixes
* add shebang
* Fixups
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* file perms
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
* debug
* Install new conda in the worker
* Disable GPU tests for now until the worker is back
* Rename workflows
* debug
* Fixup conda install
* fixup(wrapper): pass args
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---------
Signed-off-by: GitHub <noreply@github.com>
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: Aisuko <urakiny@gmail.com>
Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: Aisuko <urakiny@gmail.com>
2023-11-04 14:30:32 +00:00
|
|
|
import time
|
|
|
|
|
|
|
|
import grpc
|
|
|
|
import backend_pb2
|
|
|
|
import backend_pb2_grpc
|
|
|
|
from auto_gptq import AutoGPTQForCausalLM
|
2023-08-07 20:39:10 +00:00
|
|
|
from transformers import AutoTokenizer
|
|
|
|
from transformers import TextGenerationPipeline
|
|
|
|
|
|
|
|
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
|
|
|
|
|
2023-09-19 19:30:39 +00:00
|
|
|
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
|
|
|
|
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
|
|
|
|
|
2023-08-07 20:39:10 +00:00
|
|
|
# Implement the BackendServicer class with the service methods
|
|
|
|
class BackendServicer(backend_pb2_grpc.BackendServicer):
|
|
|
|
def Health(self, request, context):
|
|
|
|
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
|
|
|
|
def LoadModel(self, request, context):
|
|
|
|
try:
|
|
|
|
device = "cuda:0"
|
|
|
|
if request.Device != "":
|
|
|
|
device = request.Device
|
|
|
|
|
2023-08-07 23:10:05 +00:00
|
|
|
tokenizer = AutoTokenizer.from_pretrained(request.Model, use_fast=request.UseFastTokenizer)
|
2023-08-07 20:39:10 +00:00
|
|
|
|
|
|
|
model = AutoGPTQForCausalLM.from_quantized(request.Model,
|
|
|
|
model_basename=request.ModelBaseName,
|
|
|
|
use_safetensors=True,
|
2024-03-05 18:47:15 +00:00
|
|
|
trust_remote_code=request.TrustRemoteCode,
|
2023-08-07 20:39:10 +00:00
|
|
|
device=device,
|
|
|
|
use_triton=request.UseTriton,
|
|
|
|
quantize_config=None)
|
|
|
|
|
|
|
|
self.model = model
|
|
|
|
self.tokenizer = tokenizer
|
|
|
|
except Exception as err:
|
|
|
|
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
|
|
|
|
return backend_pb2.Result(message="Model loaded successfully", success=True)
|
|
|
|
|
|
|
|
def Predict(self, request, context):
|
2023-08-07 23:10:05 +00:00
|
|
|
penalty = 1.0
|
|
|
|
if request.Penalty != 0.0:
|
|
|
|
penalty = request.Penalty
|
|
|
|
tokens = 512
|
|
|
|
if request.Tokens != 0:
|
|
|
|
tokens = request.Tokens
|
|
|
|
top_p = 0.95
|
|
|
|
if request.TopP != 0.0:
|
|
|
|
top_p = request.TopP
|
|
|
|
|
2023-08-07 20:39:10 +00:00
|
|
|
# Implement Predict RPC
|
|
|
|
pipeline = TextGenerationPipeline(
|
|
|
|
model=self.model,
|
|
|
|
tokenizer=self.tokenizer,
|
2023-08-07 23:10:05 +00:00
|
|
|
max_new_tokens=tokens,
|
2023-08-07 20:39:10 +00:00
|
|
|
temperature=request.Temperature,
|
2023-08-07 23:10:05 +00:00
|
|
|
top_p=top_p,
|
|
|
|
repetition_penalty=penalty,
|
2023-08-07 20:39:10 +00:00
|
|
|
)
|
2023-08-07 23:27:38 +00:00
|
|
|
t = pipeline(request.Prompt)[0]["generated_text"]
|
|
|
|
# Remove prompt from response if present
|
|
|
|
if request.Prompt in t:
|
|
|
|
t = t.replace(request.Prompt, "")
|
|
|
|
|
|
|
|
return backend_pb2.Result(message=bytes(t, encoding='utf-8'))
|
2023-08-07 20:39:10 +00:00
|
|
|
|
|
|
|
def PredictStream(self, request, context):
|
|
|
|
# Implement PredictStream RPC
|
|
|
|
#for reply in some_data_generator():
|
|
|
|
# yield reply
|
|
|
|
# Not implemented yet
|
|
|
|
return self.Predict(request, context)
|
|
|
|
|
|
|
|
|
|
|
|
def serve(address):
|
2023-09-19 19:30:39 +00:00
|
|
|
server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS))
|
2023-08-07 20:39:10 +00:00
|
|
|
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
|
|
|
|
server.add_insecure_port(address)
|
|
|
|
server.start()
|
|
|
|
print("Server started. Listening on: " + address, file=sys.stderr)
|
|
|
|
|
|
|
|
# Define the signal handler function
|
|
|
|
def signal_handler(sig, frame):
|
|
|
|
print("Received termination signal. Shutting down...")
|
|
|
|
server.stop(0)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
# Set the signal handlers for SIGINT and SIGTERM
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
|
|
|
|
try:
|
|
|
|
while True:
|
|
|
|
time.sleep(_ONE_DAY_IN_SECONDS)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
server.stop(0)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="Run the gRPC server.")
|
|
|
|
parser.add_argument(
|
|
|
|
"--addr", default="localhost:50051", help="The address to bind the server to."
|
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
serve(args.addr)
|