From f8cea16c03a7b175e205f61649d2e80e3ea04a13 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 24 May 2024 23:52:13 +0200 Subject: [PATCH 01/80] :arrow_up: Update ggerganov/llama.cpp (#2399) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b4ced7e9..ee58dcbe 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f +CPPLLAMA_VERSION?=d041d2ceaaf50e058622d92921b3e680ffa4e9e7 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 29615576fbb07465265a9f2297d624979868eed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Sat, 25 May 2024 00:33:50 -0700 Subject: [PATCH 02/80] ci: fix sd release (#2400) Signed-off-by: Sertac Ozercan --- .github/workflows/release.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 330b2559..7c7f7742 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -100,6 +100,12 @@ jobs: with: name: stablediffusion path: release/ + - name: Release + uses: softprops/action-gh-release@v2 + if: startsWith(github.ref, 'refs/tags/') + with: + files: | + release/* build-macOS-arm64: runs-on: macos-14 From e1d6b706f4b8e4499f13c6dcfbdf9ccfbbe20718 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 10:08:23 +0200 Subject: [PATCH 03/80] Update quickstart.md (#2404) Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 0c964eb0..1bba42fb 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -123,9 +123,7 @@ You can check out the releases in https://github.com/mudler/LocalAI/releases. | OS | Link | | --- | --- | -| Linux (CUDA 11) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-cuda11-Linux-x86_64) | -| Linux (CUDA 12) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-cuda12-Linux-x86_64) | -| Linux (No GPU) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) | +| Linux | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) | | MacOS | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) | From 663488b6bd3f2086dafbcbef9843019a36d1d7b1 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 25 May 2024 10:08:35 +0200 Subject: [PATCH 04/80] :arrow_up: Update docs version mudler/LocalAI (#2398) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- docs/data/version.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/data/version.json b/docs/data/version.json index 6991ef2f..d4af2be3 100644 --- a/docs/data/version.json +++ b/docs/data/version.json @@ -1,3 +1,3 @@ { - "version": "v2.15.0" + "version": "v2.16.0" } From 003b43f6fc4844cbf495d22438c85e742d130fdc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 10:18:20 +0200 Subject: [PATCH 05/80] Update quickstart.md Signed-off-by: Ettore Di Giacinto --- docs/content/docs/getting-started/quickstart.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md index 1bba42fb..f92303e0 100644 --- a/docs/content/docs/getting-started/quickstart.md +++ b/docs/content/docs/getting-started/quickstart.md @@ -114,12 +114,17 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca {{% /alert %}} -## From binary +## Running LocalAI from Binaries -LocalAI is available as a standalone binary as well. Binaries are compiled for Linux and MacOS and automatically uploaded in the Github releases. Windows is known to work with WSL. +LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL). -You can check out the releases in https://github.com/mudler/LocalAI/releases. +Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS: +```bash +curl -Lo local-ai "https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-$(uname -s)-$(uname -m)" && chmod +x local-ai && ./local-ai +``` + +Otherwise, here are the links to the binaries: | OS | Link | | --- | --- | From 785c54e7b0c7824762ac4f025f2da0cfdd1eacf1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 16:11:01 +0200 Subject: [PATCH 06/80] models(gallery): add Mirai Nova (#2405) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index a38a78e1..b43aced1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -57,6 +57,25 @@ - filename: LocalAI-Llama3-8b-Function-Call-v0.2-q4_k_m.bin sha256: 7e46405ce043cbc8d30f83f26a5655dc8edf5e947b748d7ba2745bd0af057a41 uri: huggingface://mudler/LocalAI-Llama3-8b-Function-Call-v0.2-GGUF/LocalAI-Llama3-8b-Function-Call-v0.2-q4_k_m.bin +- !!merge <<: *mudler + icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/SKuXcvmZ_6oD4NCMkvyGo.png" + name: "mirai-nova-llama3-LocalAI-8b-v0.1" + urls: + - https://huggingface.co/mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF + - https://huggingface.co/mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1 + description: | + Mirai Nova: "Mirai" means future in Japanese, and "Nova" references a star showing a sudden large increase in brightness. + + A set of models oriented in function calling, but generalist and with enhanced reasoning capability. This is fine tuned with Llama3. + + Mirai Nova works particularly well with LocalAI, leveraging the function call with grammars feature out of the box. + overrides: + parameters: + model: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin + files: + - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin + sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec + uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin - &parler-tts ### START parler-tts url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master" From bb3ec56de3231354ec6a3e9b368f7fe4017385a2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 16:11:59 +0200 Subject: [PATCH 07/80] docs: add distributed inferencing docs Signed-off-by: Ettore Di Giacinto --- README.md | 5 +- docs/content/docs/advanced/advanced-usage.md | 2 + .../docs/features/distributed_inferencing.md | 101 ++++++++++++++++++ docs/content/docs/features/reranker.md | 2 +- docs/content/docs/overview.md | 3 +- 5 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 docs/content/docs/features/distributed_inferencing.md diff --git a/README.md b/README.md index 377df0d2..a4479258 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap) -- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) +- 🔥🔥 Decentralized llama.cpp: https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs https://localai.io/features/distribute/ - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334 - 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328 - 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324 @@ -94,7 +94,8 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/) -- 🆕 [Reranker API](https://localai.io/features/reranker/) +- 📈 [Reranker API](https://localai.io/features/reranker/) +- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) ## 💻 Usage diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 085606e5..40d7d0fc 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -370,6 +370,8 @@ there are additional environment variables available that modify the behavior of | `GO_TAGS` | | Go tags. Available: `stablediffusion` | | `HUGGINGFACEHUB_API_TOKEN` | | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend | | `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the conda environment on start | +| `DISABLE_AUTODETECT` | `false` | Disable autodetect of CPU flagset on start | +| `LLAMACPP_GRPC_SERVERS` | | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` | Here is how to configure these variables: diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md new file mode 100644 index 00000000..746616f9 --- /dev/null +++ b/docs/content/docs/features/distributed_inferencing.md @@ -0,0 +1,101 @@ ++++ +disableToc = false +title = "✍️ Distributed inferencing" +weight = 15 +url = "/features/distribute/" ++++ + +{{% alert note %}} +This feature is available only with llama-cpp compatible models. + +This feature has landed with https://github.com/mudler/LocalAI/pull/2324 and is based on the upstream work in https://github.com/ggerganov/llama.cpp/pull/6829. +{{% /alert %}} + +This feature allows LocalAI to manage the requests while the workload is distributed among workers. + +## Usage + +### Start workers + +To start workers to offload the computation you can run: + +``` +local-ai llamacpp-worker +``` + +However, you can also follow the llama.cpp README and building the rpc-server (https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is still compatible with LocalAI. + +### Start LocalAI + +When starting the LocalAI server, which is going to accept the API requests, you can set a list of workers IP/address by specifying the addresses with the `LLAMACPP_GRPC_SERVERS` environment variable, for example: + +```bash +LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run +``` + +At this point the workload hitting in the LocalAI server should be distributed across the nodes! + +## Peer to peer + +![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584) + +The workers can also be connected to each other, creating a peer to peer network, where the workload is distributed among the workers, in a private, decentralized network. + +A shared token between the server and the workers is needed to let the communication happen via the p2p network. This feature supports both local network (with mdns discovery) and dht for communicating also behind different networks. + +The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token). + +A network is established between the server and the workers with dht and mdns discovery protocols, the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on. + +When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally. Then llama.cpp is configured to use the services. If you are interested in how it works behind the scenes, see the PR: https://github.com/mudler/LocalAI/pull/2343. + + +### Usage + +1. Start the server with `--p2p`: + +```bash +./local-ai run --p2p +# 1:02AM INF loading environment variables from file envFile=.env +# 1:02AM INF Setting logging to info +# 1:02AM INF P2P mode enabled +# 1:02AM INF No token provided, generating one +# 1:02AM INF Generated Token: +# XXXXXXXXXXX +# 1:02AM INF Press a button to proceed +``` + +A token is displayed, copy it and press enter. + +You can re-use the same token later restarting the server with `--p2ptoken` (or `P2P_TOKEN`). + +2. Start the workers. Now you can copy the local-ai binary in other hosts, and run as many workers with that token: + +```bash +TOKEN=XXX ./local-ai p2p-llama-cpp-rpc +# 1:06AM INF loading environment variables from file envFile=.env +# 1:06AM INF Setting logging to info +# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"} +# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:295","message":" go-libp2p resource manager protection enabled"} +# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:409","message":"max connections: 100\n"} +# 1:06AM INF Starting llama-cpp-rpc-server on '127.0.0.1:34371' +# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"node/node.go:118","message":" Starting EdgeVPN network"} +# create_backend: using CPU backend +# Starting RPC server on 127.0.0.1:34371, backend memory: 31913 MB +# 2024/05/19 01:06:01 failed to sufficiently increase receive buffer size (was: 208 kiB, wanted: 2048 kiB, got: 416 kiB). # See https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes for details. +# {"level":"INFO","time":"2024-05-19T01:06:01.805+0200","caller":"node/node.go:172","message":" Node ID: 12D3KooWJ7WQAbCWKfJgjw2oMMGGss9diw3Sov5hVWi8t4DMgx92"} +# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"node/node.go:173","message":" Node Addresses: [/ip4/127.0.0.1/tcp/44931 /ip4/127.0.0.1/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/127.0.0.1/udp/35660/quic-v1 /ip4/192.168.68.110/tcp/44931 /ip4/192.168.68.110/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/192.168.68.110/udp/35660/quic-v1 /ip6/::1/tcp/41289 /ip6/::1/udp/33160/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip6/::1/udp/35701/quic-v1]"} +# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"discovery/dht.go:104","message":" Bootstrapping DHT"} +``` + +(Note you can also supply the token via args) + +At this point, you should see in the server logs messages stating that new workers are found + +3. Now you can start doing inference as usual on the server (the node used on step 1) + + +## Notes + +- Only single model is supported for now +- Make sure that the server sees new workers after usage starts - currently, if you start the inference you can't add other workers later on. \ No newline at end of file diff --git a/docs/content/docs/features/reranker.md b/docs/content/docs/features/reranker.md index 92c406df..4bc01a7f 100644 --- a/docs/content/docs/features/reranker.md +++ b/docs/content/docs/features/reranker.md @@ -1,7 +1,7 @@ +++ disableToc = false -title = " Reranker" +title = "📈 Reranker" weight = 11 url = "/features/reranker/" +++ diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md index 15086f6f..beadfbd3 100644 --- a/docs/content/docs/overview.md +++ b/docs/content/docs/overview.md @@ -101,7 +101,8 @@ Note that this started just as a fun weekend project by [mudler](https://github. - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) - 🥽 [Vision API](https://localai.io/features/gpt-vision/) - 💾 [Stores](https://localai.io/stores) -- 🆕 [Reranker](https://localai.io/features/reranker/) +- 📈 [Reranker](https://localai.io/features/reranker/) +- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/) ## Contribute and help From e25fc656c97e4f63cecfc81c35cfb2c9891ef62f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 16:13:04 +0200 Subject: [PATCH 08/80] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a4479258..dc0ba70e 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/) - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`) - 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation) -- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕 +- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/) - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/) - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/) From 785adc1ed5cb623dc9d1dde07061c4e2ddaf0fad Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 16:13:44 +0200 Subject: [PATCH 09/80] docs: updaet title Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/distributed_inferencing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index 746616f9..8a4cc545 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -1,6 +1,6 @@ +++ disableToc = false -title = "✍️ Distributed inferencing" +title = "🆕🖧 Distributed inferencing" weight = 15 url = "/features/distribute/" +++ From fc3502b56f0d69be7e514a32ec22814d95c66915 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 20:17:04 +0200 Subject: [PATCH 10/80] docs: rewording Signed-off-by: Ettore Di Giacinto --- .../docs/features/distributed_inferencing.md | 56 +++++++++---------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index 8a4cc545..b3b84528 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -1,54 +1,53 @@ +++ disableToc = false -title = "🆕🖧 Distributed inferencing" +title = "🆕🖧 Distributed Inference" weight = 15 url = "/features/distribute/" +++ {{% alert note %}} -This feature is available only with llama-cpp compatible models. +This feature is available exclusively with llama-cpp compatible models. -This feature has landed with https://github.com/mudler/LocalAI/pull/2324 and is based on the upstream work in https://github.com/ggerganov/llama.cpp/pull/6829. +This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829). {{% /alert %}} -This feature allows LocalAI to manage the requests while the workload is distributed among workers. +This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance. ## Usage -### Start workers +### Starting Workers -To start workers to offload the computation you can run: +To start workers for distributing the computational load, run: -``` +```bash local-ai llamacpp-worker ``` -However, you can also follow the llama.cpp README and building the rpc-server (https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is still compatible with LocalAI. +Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI. -### Start LocalAI +### Starting LocalAI -When starting the LocalAI server, which is going to accept the API requests, you can set a list of workers IP/address by specifying the addresses with the `LLAMACPP_GRPC_SERVERS` environment variable, for example: +To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable: ```bash LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run ``` -At this point the workload hitting in the LocalAI server should be distributed across the nodes! +The workload on the LocalAI server will then be distributed across the specified nodes. -## Peer to peer +## Peer-to-Peer Networking ![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584) -The workers can also be connected to each other, creating a peer to peer network, where the workload is distributed among the workers, in a private, decentralized network. +Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner. -A shared token between the server and the workers is needed to let the communication happen via the p2p network. This feature supports both local network (with mdns discovery) and dht for communicating also behind different networks. +A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks. -The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token). +The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument. -A network is established between the server and the workers with dht and mdns discovery protocols, the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on. - -When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally. Then llama.cpp is configured to use the services. If you are interested in how it works behind the scenes, see the PR: https://github.com/mudler/LocalAI/pull/2343. +A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect. +When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343). ### Usage @@ -65,14 +64,14 @@ When the HTTP server is started, it will discover the workers in the network and # 1:02AM INF Press a button to proceed ``` -A token is displayed, copy it and press enter. +Copy the displayed token and press Enter. -You can re-use the same token later restarting the server with `--p2ptoken` (or `P2P_TOKEN`). +To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`. -2. Start the workers. Now you can copy the local-ai binary in other hosts, and run as many workers with that token: +2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token: ```bash -TOKEN=XXX ./local-ai p2p-llama-cpp-rpc +TOKEN=XXX ./local-ai p2p-llama-cpp-rpc # 1:06AM INF loading environment variables from file envFile=.env # 1:06AM INF Setting logging to info # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"} @@ -88,14 +87,13 @@ TOKEN=XXX ./local-ai p2p-llama-cpp-rpc # {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"discovery/dht.go:104","message":" Bootstrapping DHT"} ``` -(Note you can also supply the token via args) +(Note: You can also supply the token via command-line arguments) -At this point, you should see in the server logs messages stating that new workers are found +The server logs should indicate that new workers are being discovered. -3. Now you can start doing inference as usual on the server (the node used on step 1) +3. Start inference as usual on the server initiated in step 1. +## Notes -## Notes - -- Only single model is supported for now -- Make sure that the server sees new workers after usage starts - currently, if you start the inference you can't add other workers later on. \ No newline at end of file +- Only a single model is supported currently. +- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun. \ No newline at end of file From b90cdced5934fe85f48f7f9942cfbd6f781174e6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 25 May 2024 20:18:25 +0200 Subject: [PATCH 11/80] docs: rewording Signed-off-by: Ettore Di Giacinto --- .../docs/features/constrained_grammars.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/content/docs/features/constrained_grammars.md b/docs/content/docs/features/constrained_grammars.md index 9aa9279e..5ffa3a23 100644 --- a/docs/content/docs/features/constrained_grammars.md +++ b/docs/content/docs/features/constrained_grammars.md @@ -1,26 +1,27 @@ - +++ disableToc = false -title = "✍️ Constrained grammars" +title = "✍️ Constrained Grammars" weight = 15 url = "/features/constrained_grammars/" +++ -The chat endpoint accepts an additional `grammar` parameter which takes a [BNF defined grammar](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form). +## Overview -This allows the LLM to constrain the output to a user-defined schema, allowing to generate `JSON`, `YAML`, and everything that can be defined with a BNF grammar. +The `chat` endpoint supports the `grammar` parameter, which allows users to specify a grammar in Backus-Naur Form (BNF). This feature enables the Large Language Model (LLM) to generate outputs adhering to a user-defined schema, such as `JSON`, `YAML`, or any other format that can be defined using BNF. For more details about BNF, see [Backus-Naur Form on Wikipedia](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form). {{% alert note %}} -This feature works only with models compatible with the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend (see also [Model compatibility]({{%relref "docs/reference/compatibility-table" %}})). For details on how it works, see the upstream PRs: https://github.com/ggerganov/llama.cpp/pull/1773, https://github.com/ggerganov/llama.cpp/pull/1887 +**Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility](docs/reference/compatibility-table) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887). {{% /alert %}} ## Setup -Follow the setup instructions from the [LocalAI functions]({{%relref "docs/features/openai-functions" %}}) page. +To use this feature, follow the installation and setup instructions on the [LocalAI Functions](docs/features/openai-functions) page. Ensure that your local setup meets all the prerequisites specified for the llama.cpp backend. -## 💡 Usage example +## 💡 Usage Example -For example, to constrain the output to either `yes`, `no`: +The following example demonstrates how to use the `grammar` parameter to constrain the model's output to either "yes" or "no". This can be particularly useful in scenarios where the response format needs to be strictly controlled. + +### Example: Binary Response Constraint ```bash curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ @@ -29,3 +30,5 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso "grammar": "root ::= (\"yes\" | \"no\")" }' ``` + +In this example, the `grammar` parameter is set to a simple choice between "yes" and "no", ensuring that the model's response adheres strictly to one of these options regardless of the context. \ No newline at end of file From 3200a6655e1413bf82fb8c6cdd142f47fdf95125 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Sun, 26 May 2024 00:56:06 -0700 Subject: [PATCH 12/80] fix: gpu fetch device info (#2403) * fix: gpu fetch device info Signed-off-by: Sertac Ozercan * use pciutils package Signed-off-by: Sertac Ozercan --------- Signed-off-by: Sertac Ozercan --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2dd092d6..15475ed1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -107,7 +107,7 @@ ENV BUILD_TYPE=${BUILD_TYPE} RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ apt-get update && \ apt-get install -y --no-install-recommends \ - software-properties-common && \ + software-properties-common pciutils && \ curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ dpkg -i cuda-keyring_1.1-1_all.deb && \ rm -f cuda-keyring_1.1-1_all.deb && \ @@ -355,7 +355,7 @@ RUN mkdir -p /build/models # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1 - + VOLUME /build/models EXPOSE 8080 ENTRYPOINT [ "/build/entrypoint.sh" ] From 480834f75b5c66a31cb72081e966a20a244ff634 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 26 May 2024 10:05:15 +0200 Subject: [PATCH 13/80] :arrow_up: Update ggerganov/whisper.cpp (#2408) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ee58dcbe..58b65f88 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=22d46b7ba4620e2db1281e210d0186863cffcec0 +WHISPER_CPP_VERSION?=a7dc2aab16822b80a6491b0bd4bbf4900404a8a0 # bert.cpp version BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 From 593fb62bf0eba5e73f4b5a957f9ce4aef95ea773 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 26 May 2024 10:43:50 +0200 Subject: [PATCH 14/80] :arrow_up: Update ggerganov/llama.cpp (#2409) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58b65f88..0ef42c61 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=d041d2ceaaf50e058622d92921b3e680ffa4e9e7 +CPPLLAMA_VERSION?=9588f196b1d7b21bdff013fcf958c249576b2619 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From db3113c5c831d3b9ab35b97981e83c2c7084893b Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 14:48:30 +0200 Subject: [PATCH 15/80] fix(watcher): do not emit fatal errors (#2410) Signed-off-by: Ettore Di Giacinto --- core/startup/config_file_watcher.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go index 259446f1..6a2bdca1 100644 --- a/core/startup/config_file_watcher.go +++ b/core/startup/config_file_watcher.go @@ -71,8 +71,7 @@ func (c *configFileHandler) Watch() error { configWatcher, err := fsnotify.NewWatcher() c.watcher = configWatcher if err != nil { - log.Fatal().Err(err).Str("configdir", c.appConfig.DynamicConfigsDir).Msg("unable to create a watcher for configuration directory") - + return err } if c.appConfig.DynamicConfigsDirPollInterval > 0 { From 3280de7adf23757a30c91058503a8c224f576fd2 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 15:43:31 +0200 Subject: [PATCH 16/80] models(gallery): add Mahou (#2411) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b43aced1..d7723b41 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -817,6 +817,26 @@ - filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf +- url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "mahou-1.2-llama3-8b" + license: llama3 + icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png + urls: + - https://huggingface.co/flammenai/Mahou-1.2-llama3-8B-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - llama3 + overrides: + context_size: 8192 + parameters: + model: Mahou-1.2-llama3-8B-Q4_K_M.gguf + files: + - filename: Mahou-1.2-llama3-8B-Q4_K_M.gguf + sha256: 651b405dff71e4ce80e15cc6d393463f02833428535c56eb6bae113776775d62 + uri: huggingface://flammenai/Mahou-1.2-llama3-8B-GGUF/Mahou-1.2-llama3-8B-Q4_K_M.gguf - &yi-chat ### Start Yi url: "github:mudler/LocalAI/gallery/chatml.yaml@master" From 135208806c7a2f7d886a3d1626bc3b6942d428e4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 15:58:19 +0200 Subject: [PATCH 17/80] models(gallery): add minicpm (#2412) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index d7723b41..760a1902 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1158,6 +1158,31 @@ - filename: llava-llama-3-8b-v1_1-mmproj-f16.gguf sha256: eb569aba7d65cf3da1d0369610eb6869f4a53ee369992a804d5810a80e9fa035 uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf +- !!merge <<: *llama3 + name: "minicpm-llama3-v-2_5" + urls: + - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf + - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5 + description: | + MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + overrides: + mmproj: minicpm-llama3-mmproj-f16.gguf + parameters: + model: minicpm-llama3-Q4_K_M.gguf + files: + - filename: minicpm-llama3-Q4_K_M.gguf + sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2 + uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/minicpm-llama3-Q4_K_M.gguf + - filename: minicpm-llama3-mmproj-f16.gguf + sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e + uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf ### ChatML - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "helpingai-9b" From 6343758f9c93c842c69dd2db6ce48412dc559ca4 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 19:59:49 +0200 Subject: [PATCH 18/80] models(gallery): add poppy porpoise 0.85 (#2415) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 760a1902..8f57ae28 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1106,6 +1106,34 @@ - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf + parameters: + model: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf + files: + - filename: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf + sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908 + uri: huggingface://Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf + - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf + sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba + uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf - !!merge <<: *llama3 name: "bunny-llama-3-8b-v" urls: From 345047ed7c674999d06acaeb33e607ae9f269b33 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 20:04:26 +0200 Subject: [PATCH 19/80] models(gallery): add alpha centauri (#2416) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 8f57ae28..bcb38ad2 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1062,6 +1062,33 @@ - filename: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf sha256: 46475a748064b0580638d2d80c78d05d04944ef8414c2d25bdc7e38e90d58b70 uri: huggingface://swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_GGUF/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf +- !!merge <<: *llama3 + name: "llama-3-alpha-centauri-v0.1" + urls: + - https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF + description: | + Centaurus Series + + This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses: + + Science, Technology, Engineering, and Mathematics (STEM) + Computer Science (including programming) + Social Sciences + + And several key cognitive skills, including but not limited to: + + Reasoning and logical deduction + Critical thinking + Analysis + + icon: https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/resolve/main/alpha_centauri_banner.png + overrides: + parameters: + model: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf + files: + - filename: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf + sha256: e500a6b8d090b018a18792ce3bf6d830e6c0b6f920bed8d38e453c0d6b2d7c3d + uri: huggingface://fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf - !!merge <<: *llama3 name: "aurora_l3_8b-iq-imatrix" urls: From 16433d2e8e0d6f0346d6d872f94b6a53b2e6cc33 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 26 May 2024 13:05:52 -0500 Subject: [PATCH 20/80] fix: install pytorch from proper index for hipblas builds (#2413) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- backend/python/autogptq/requirements-hipblas.txt | 2 ++ backend/python/bark/requirements-hipblas.txt | 3 +++ backend/python/common/template/requirements-hipblas.txt | 2 ++ backend/python/coqui/requirements-hipblas.txt | 3 +++ backend/python/diffusers/requirements-hipblas.txt | 3 +++ backend/python/openvoice/requirements-hipblas.txt | 2 ++ backend/python/parler-tts/requirements-hipblas.txt | 3 +++ backend/python/petals/requirements-hipblas.txt | 2 ++ backend/python/rerankers/requirements-hipblas.txt | 2 ++ backend/python/sentencetransformers/requirements-hipblas.txt | 2 ++ backend/python/transformers-musicgen/requirements-hipblas.txt | 2 ++ backend/python/transformers/requirements-hipblas.txt | 2 ++ backend/python/vall-e-x/requirements-hipblas.txt | 3 +++ backend/python/vllm/requirements-hipblas.txt | 2 ++ 14 files changed, 33 insertions(+) create mode 100644 backend/python/autogptq/requirements-hipblas.txt create mode 100644 backend/python/bark/requirements-hipblas.txt create mode 100644 backend/python/common/template/requirements-hipblas.txt create mode 100644 backend/python/coqui/requirements-hipblas.txt create mode 100644 backend/python/diffusers/requirements-hipblas.txt create mode 100644 backend/python/openvoice/requirements-hipblas.txt create mode 100644 backend/python/parler-tts/requirements-hipblas.txt create mode 100644 backend/python/petals/requirements-hipblas.txt create mode 100644 backend/python/rerankers/requirements-hipblas.txt create mode 100644 backend/python/sentencetransformers/requirements-hipblas.txt create mode 100644 backend/python/transformers-musicgen/requirements-hipblas.txt create mode 100644 backend/python/transformers/requirements-hipblas.txt create mode 100644 backend/python/vall-e-x/requirements-hipblas.txt create mode 100644 backend/python/vllm/requirements-hipblas.txt diff --git a/backend/python/autogptq/requirements-hipblas.txt b/backend/python/autogptq/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/autogptq/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt new file mode 100644 index 00000000..7bfc411b --- /dev/null +++ b/backend/python/bark/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch +torchaudio \ No newline at end of file diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/common/template/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt new file mode 100644 index 00000000..7bfc411b --- /dev/null +++ b/backend/python/coqui/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch +torchaudio \ No newline at end of file diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt new file mode 100644 index 00000000..6c8da20d --- /dev/null +++ b/backend/python/diffusers/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch +torchvision \ No newline at end of file diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/openvoice/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/openvoice/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt new file mode 100644 index 00000000..7bfc411b --- /dev/null +++ b/backend/python/parler-tts/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch +torchaudio \ No newline at end of file diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt new file mode 100644 index 00000000..0331f106 --- /dev/null +++ b/backend/python/petals/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/rerankers/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/sentencetransformers/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/transformers-musicgen/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/transformers/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt new file mode 100644 index 00000000..7bfc411b --- /dev/null +++ b/backend/python/vall-e-x/requirements-hipblas.txt @@ -0,0 +1,3 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch +torchaudio \ No newline at end of file diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt new file mode 100644 index 00000000..76018445 --- /dev/null +++ b/backend/python/vllm/requirements-hipblas.txt @@ -0,0 +1,2 @@ +--extra-index-url https://download.pytorch.org/whl/rocm6.0 +torch \ No newline at end of file From 2c8205854872ad4970f3cf26236c90a3d8a6212e Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 20:11:57 +0200 Subject: [PATCH 21/80] models(gallery): add cream-phi-13b (#2417) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ gallery/phi-3-chat.yaml | 1 + 2 files changed, 15 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index bcb38ad2..2ae17922 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1457,6 +1457,20 @@ - filename: "Phi-3-medium-4k-instruct-Q4_K_M.gguf" uri: "huggingface://bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf" sha256: 4e8d4258ed44562573c8984a045b0a4651c51e7e4d9d00a06c65cd2149ab4539 +- !!merge <<: *phi-3 + name: "cream-phi-3-14b-v1" + icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/AP4-OHepdqiqHj2KSi26M.gif + description: | + CreamPhi 14B is the first Phi Medium to be trained with roleplay and moist. + urls: + - https://huggingface.co/TheDrummer/Cream-Phi-3-14B-v1-GGUF + overrides: + parameters: + model: Cream-Phi-3-14B-v1-Q4_K_M.gguf + files: + - filename: Cream-Phi-3-14B-v1-Q4_K_M.gguf + uri: huggingface://TheDrummer/Cream-Phi-3-14B-v1-GGUF/Cream-Phi-3-14B-v1-Q4_K_M.gguf + sha256: ec67018a86090da415517acf21ad48f28e02dff664a1dd35602f1f8fa94f6a27 - &hermes-2-pro-mistral ### START Hermes url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml index ede4fd0f..98a3f385 100644 --- a/gallery/phi-3-chat.yaml +++ b/gallery/phi-3-chat.yaml @@ -16,3 +16,4 @@ config_file: | f16: true stopwords: - <|end|> + - <|endoftext|> From ff1f9125ed1e391b33dbffe75df56cbca9d17a75 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 26 May 2024 20:12:40 +0200 Subject: [PATCH 22/80] models(gallery): add stheno-mahou (#2418) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 2ae17922..519d23ed 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -311,6 +311,20 @@ - filename: l3-8b-stheno-v3.1.Q4_K_M.gguf sha256: f166fb8b7fd1de6638fcf8e3561c99292f0c37debe1132325aa583eef78f1b40 uri: huggingface://mudler/L3-8B-Stheno-v3.1-Q4_K_M-GGUF/l3-8b-stheno-v3.1.Q4_K_M.gguf +- !!merge <<: *llama3 + name: "llama-3-stheno-mahou-8b" + urls: + - https://huggingface.co/mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF + - https://huggingface.co/nbeerbower/llama-3-Stheno-Mahou-8B + description: | + This model was merged using the Model Stock merge method using flammenai/Mahou-1.2-llama3-8B as a base. + overrides: + parameters: + model: llama-3-stheno-mahou-8b-q4_k_m.gguf + files: + - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf + sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11 + uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf - !!merge <<: *llama3 name: "llama-3-8b-openhermes-dpo" urls: From ba984c70975f0c08b6f5b4b797a46c4ea6697562 Mon Sep 17 00:00:00 2001 From: cryptk <421501+cryptk@users.noreply.github.com> Date: Sun, 26 May 2024 13:27:07 -0500 Subject: [PATCH 23/80] fix: pin version of setuptools for intel builds to work around #2406 (#2414) Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com> --- backend/python/autogptq/requirements-intel.txt | 2 +- backend/python/bark/requirements-intel.txt | 2 +- backend/python/coqui/requirements-intel.txt | 2 +- backend/python/diffusers/requirements-intel.txt | 2 +- backend/python/parler-tts/requirements-intel.txt | 2 +- backend/python/petals/requirements-intel.txt | 2 +- backend/python/rerankers/requirements-intel.txt | 2 +- backend/python/sentencetransformers/requirements-intel.txt | 2 +- backend/python/transformers-musicgen/requirements-intel.txt | 2 +- backend/python/transformers/requirements-intel.txt | 2 +- backend/python/vall-e-x/requirements-intel.txt | 2 +- backend/python/vllm/requirements-intel.txt | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/autogptq/requirements-intel.txt +++ b/backend/python/autogptq/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt index 54b3900d..e6b4afc0 100644 --- a/backend/python/bark/requirements-intel.txt +++ b/backend/python/bark/requirements-intel.txt @@ -3,4 +3,4 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt index 54b3900d..e6b4afc0 100644 --- a/backend/python/coqui/requirements-intel.txt +++ b/backend/python/coqui/requirements-intel.txt @@ -3,4 +3,4 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt index 7d048246..3637b322 100644 --- a/backend/python/diffusers/requirements-intel.txt +++ b/backend/python/diffusers/requirements-intel.txt @@ -3,4 +3,4 @@ intel-extension-for-pytorch torch torchvision optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt index 54b3900d..e6b4afc0 100644 --- a/backend/python/parler-tts/requirements-intel.txt +++ b/backend/python/parler-tts/requirements-intel.txt @@ -3,4 +3,4 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/petals/requirements-intel.txt +++ b/backend/python/petals/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/rerankers/requirements-intel.txt +++ b/backend/python/rerankers/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ b/backend/python/sentencetransformers/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/transformers-musicgen/requirements-intel.txt +++ b/backend/python/transformers-musicgen/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt index 54b3900d..e6b4afc0 100644 --- a/backend/python/vall-e-x/requirements-intel.txt +++ b/backend/python/vall-e-x/requirements-intel.txt @@ -3,4 +3,4 @@ intel-extension-for-pytorch torch torchaudio optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt index cec8bff4..95d4848c 100644 --- a/backend/python/vllm/requirements-intel.txt +++ b/backend/python/vllm/requirements-intel.txt @@ -2,4 +2,4 @@ intel-extension-for-pytorch torch optimum[openvino] -setuptools \ No newline at end of file +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 \ No newline at end of file From e9c28a1ed7eef43ac5266029de5d9b3033c0103c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 26 May 2024 23:32:05 +0200 Subject: [PATCH 24/80] :arrow_up: Update ggerganov/llama.cpp (#2419) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0ef42c61..e79a3c9c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=9588f196b1d7b21bdff013fcf958c249576b2619 +CPPLLAMA_VERSION?=dff451cfa1f297348751ce6b538670e1ae9a7d5b # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From eaf653f3d300244970aade30be03d2e0c8ced346 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 May 2024 17:17:04 +0200 Subject: [PATCH 25/80] models(gallery): add iterative-dpo, fix minicpm (#2422) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 519d23ed..688312bc 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -779,6 +779,21 @@ - filename: Tess-2.0-Llama-3-8B-Q4_K_M.gguf sha256: 3b5fbd6c59d7d38205ab81970c0227c74693eb480acf20d8c2f211f62e3ca5f6 uri: huggingface://bartowski/Tess-2.0-Llama-3-8B-GGUF/Tess-2.0-Llama-3-8B-Q4_K_M.gguf +- !!merge <<: *llama3 + name: "llama3-iterative-dpo-final" + urls: + - https://huggingface.co/bartowski/LLaMA3-iterative-DPO-final-GGUF + - https://huggingface.co/RLHFlow/LLaMA3-iterative-DPO-final + description: | + From model card: + We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling. + overrides: + parameters: + model: LLaMA3-iterative-DPO-final-Q4_K_M.gguf + files: + - filename: LLaMA3-iterative-DPO-final-Q4_K_M.gguf + sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8 + uri: huggingface://bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf - &dolphin name: "dolphin-2.9-llama3-8b" url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master" @@ -1248,7 +1263,7 @@ files: - filename: minicpm-llama3-Q4_K_M.gguf sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2 - uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/minicpm-llama3-Q4_K_M.gguf + uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf From be8ffbdfcfbf4d7a848ce670e94f37858ad788ca Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 May 2024 17:23:34 +0200 Subject: [PATCH 26/80] ci(grpc-cache): also arm64 (#2423) grpc-cache: also arm64 Signed-off-by: Ettore Di Giacinto --- .github/workflows/generate_grpc_cache.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index 3d80b967..fa5ccf20 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -17,7 +17,7 @@ jobs: include: - grpc-base-image: ubuntu:22.04 runs-on: 'ubuntu-latest' - platforms: 'linux/amd64' + platforms: 'linux/amd64,linux/arm64' runs-on: ${{matrix.runs-on}} steps: - name: Release space from worker From d075dc44ddf876f764912f450ad3c198075ec4b1 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 May 2024 22:07:35 +0200 Subject: [PATCH 27/80] ci: push test images when building PRs (#2424) ci: try to push image Signed-off-by: Ettore Di Giacinto --- .github/workflows/image_build.yml | 51 +++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 167a8fef..96cd5992 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -136,6 +136,7 @@ jobs: - name: Docker meta id: meta + if: github.event_name != 'pull_request' uses: docker/metadata-action@v5 with: images: | @@ -148,7 +149,20 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} - + - name: Docker meta for PR + id: meta_pull_request + if: github.event_name == 'pull_request' + uses: docker/metadata-action@v5 + with: + images: | + ttl.sh/localai-ci-pr-${{ github.event.number }} + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + type=sha + flavor: | + latest=${{ inputs.tag-latest }} + suffix=${{ inputs.tag-suffix }} - name: Docker meta AIO (quay.io) if: inputs.aio != '' id: meta_aio @@ -202,6 +216,7 @@ jobs: - name: Build and push uses: docker/build-push-action@v5 + if: github.event_name != 'pull_request' with: builder: ${{ steps.buildx.outputs.name }} # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. @@ -226,7 +241,39 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - +### Start testing image + - name: Build and push + uses: docker/build-push-action@v5 + if: github.event_name == 'pull_request' + with: + builder: ${{ steps.buildx.outputs.name }} + # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache. + # This means that even the MAKEFLAGS have to be an EXACT match. + # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch. + # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded + build-args: | + BUILD_TYPE=${{ inputs.build-type }} + CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }} + CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }} + FFMPEG=${{ inputs.ffmpeg }} + IMAGE_TYPE=${{ inputs.image-type }} + BASE_IMAGE=${{ inputs.base-image }} + GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }} + GRPC_MAKEFLAGS=--jobs=4 --output-sync=target + GRPC_VERSION=v1.64.0 + MAKEFLAGS=${{ inputs.makeflags }} + context: . + file: ./Dockerfile + cache-from: type=gha + platforms: ${{ inputs.platforms }} + push: true + tags: ${{ steps.meta_pull_request.outputs.tags }} + labels: ${{ steps.meta_pull_request.outputs.labels }} + - name: Testing image + if: github.event_name == 'pull_request' + run: | + echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY +## End testing image - name: Build and push AIO image if: inputs.aio != '' uses: docker/build-push-action@v5 From 9f5c274321c0b76ed543106a7e110e3085278919 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 May 2024 22:07:48 +0200 Subject: [PATCH 28/80] feat(images): do not install python deps in the core image (#2425) do not install python deps in the core image Signed-off-by: Ettore Di Giacinto --- Dockerfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index 15475ed1..e9653f55 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,12 +24,9 @@ RUN apt-get update && \ cmake \ curl \ git \ - python3-pip \ - python-is-python3 \ unzip && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade pip + rm -rf /var/lib/apt/lists/* # Install Go RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz @@ -39,9 +36,6 @@ ENV PATH $PATH:/root/go/bin:/usr/local/go/bin RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest -# Install grpcio-tools (the version in 22.04 is too old) -RUN pip install --user grpcio-tools - COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates @@ -85,10 +79,16 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ espeak-ng \ espeak \ + python3-pip \ + python-is-python3 \ python3-dev \ python3-venv && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + pip install --upgrade pip + +# Install grpcio-tools (the version in 22.04 is too old) +RUN pip install --user grpcio-tools ################################### ################################### From 10430a00bda3511a0141122d9a2cc04649cce2a7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 27 May 2024 22:35:11 +0200 Subject: [PATCH 29/80] feat(hipblas): extend default hipblas GPU_TARGETS (#2426) Makefile: extend default hipblas GPU_TARGETS Signed-off-by: Ettore Di Giacinto --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e79a3c9c..403550d6 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,7 @@ ifeq ($(BUILD_TYPE),hipblas) # llama-ggml has no hipblas support, so override it here. export STABLE_BUILD_TYPE= export WHISPER_HIPBLAS=1 - GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100 + GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101 AMDGPU_TARGETS ?= "$(GPU_TARGETS)" CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)" CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib From 1c80f628ffd2ee8a538327b413a1c59ff42af7e5 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Mon, 27 May 2024 23:28:36 +0200 Subject: [PATCH 30/80] :arrow_up: Update ggerganov/whisper.cpp (#2427) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 403550d6..fc158134 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=a7dc2aab16822b80a6491b0bd4bbf4900404a8a0 +WHISPER_CPP_VERSION?=c7b6988678779901d02ceba1a8212d2c9908956e # bert.cpp version BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 From 577888f3c07fd42cd52eb9c775693eff09e4a35c Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 28 May 2024 00:02:49 +0200 Subject: [PATCH 31/80] :arrow_up: Update ggerganov/llama.cpp (#2428) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fc158134..843407f4 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=dff451cfa1f297348751ce6b538670e1ae9a7d5b +CPPLLAMA_VERSION?=10b1e4587670feba2c7730a645accf8234873113 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 2bbc52fcc8672d142874ab8b44c88e587c935c17 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 28 May 2024 10:34:59 +0200 Subject: [PATCH 32/80] feat(build): add arm64 core containers (#2421) ci: add arm64 container images Signed-off-by: Ettore Di Giacinto --- .github/workflows/image.yml | 2 +- Dockerfile | 40 ++++++++++++++++++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 6ce90b1f..15b2693c 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -260,7 +260,7 @@ jobs: matrix: include: - build-type: '' - platforms: 'linux/amd64' + platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '-ffmpeg-core' ffmpeg: 'true' diff --git a/Dockerfile b/Dockerfile index e9653f55..74e97934 100644 --- a/Dockerfile +++ b/Dockerfile @@ -104,6 +104,31 @@ ARG CUDA_MINOR_VERSION=7 ENV BUILD_TYPE=${BUILD_TYPE} # CuBLAS requirements +RUN < Date: Tue, 28 May 2024 21:06:09 +0200 Subject: [PATCH 33/80] feat(functions): allow parallel calls with mixed/no grammars (#2432) Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/chat.go | 10 ++- pkg/functions/parse.go | 136 ++++++++++++++++++----------- pkg/functions/parse_test.go | 43 +++++++++ 3 files changed, 134 insertions(+), 55 deletions(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 341dc34b..b2e7aa75 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -25,7 +25,7 @@ import ( // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/chat/completions [post] func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error { - emptyMessage := "" + textContentToReturn := "" id := uuid.New().String() created := int(time.Now().Unix()) @@ -34,7 +34,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}}, Object: "chat.completion.chunk", } responses <- initialMessage @@ -69,6 +69,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup result = functions.CleanupLLMResult(result, config.FunctionsConfig) results := functions.ParseFunctionCall(result, config.FunctionsConfig) + textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0 switch { @@ -77,7 +78,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup ID: id, Created: created, Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}}, + Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}}, Object: "chat.completion.chunk", } responses <- initialMessage @@ -449,7 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup { FinishReason: finishReason, Index: 0, - Delta: &schema.Message{Content: &emptyMessage}, + Delta: &schema.Message{Content: &textContentToReturn}, }}, Object: "chat.completion.chunk", Usage: *usage, @@ -473,6 +474,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup s = functions.CleanupLLMResult(s, config.FunctionsConfig) results := functions.ParseFunctionCall(s, config.FunctionsConfig) + textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig) noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 switch { diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index d6e9d320..7bb3e6bd 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -3,6 +3,7 @@ package functions import ( "encoding/json" "regexp" + "strings" "github.com/go-skynet/LocalAI/pkg/utils" "github.com/rs/zerolog/log" @@ -59,6 +60,11 @@ type FunctionsConfig struct { // ReplaceLLMResult allow to replace strings in the results before parsing them ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"` + // CaptureLLMResult is a regex to extract a string from the LLM response + // that is used as return string when using tools. + // This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back + CaptureLLMResult []string `yaml:"capture_llm_results"` + // FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } } // instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }. // This might be useful for certain models trained with the function name as the first token. @@ -109,6 +115,20 @@ func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string { return llmresult } +func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string { + for _, r := range functionConfig.CaptureLLMResult { + // We use a regex to extract the JSON object from the response + var respRegex = regexp.MustCompile(r) + match := respRegex.FindStringSubmatch(llmresult) + if len(match) >= 1 { + m := strings.TrimSpace(match[1]) + return m + } + } + + return "" +} + func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults { log.Debug().Msgf("LLM result: %s", llmresult) @@ -127,47 +147,52 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC } results := []FuncCallResults{} + llmResults := []string{} - returnResult := func(s string) (result []FuncCallResults, e error) { + returnResult := func(results []string) (result []FuncCallResults, e error) { // As we have to change the result before processing, we can't stream the answer token-by-token (yet?) - var ss []map[string]interface{} result = make([]FuncCallResults, 0) - s = utils.EscapeNewLines(s) - err := json.Unmarshal([]byte(s), &ss) - if err != nil { - // If the LLM result is a single object, try unmarshaling it into a single map - var singleObj map[string]interface{} - err = json.Unmarshal([]byte(s), &singleObj) + + for _, s := range results { + var ss []map[string]interface{} + + s = utils.EscapeNewLines(s) + err := json.Unmarshal([]byte(s), &ss) if err != nil { - log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects") - } else { - ss = []map[string]interface{}{singleObj} - } - } - - log.Debug().Msgf("Function return: %s %+v", s, ss) - - for _, s := range ss { - // The grammar defines the function name as "function", while OpenAI returns "name" - func_name, ok := s[functionNameKey] - if !ok { - continue - //return result, fmt.Errorf("unable to find function name in result") - } - // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object - args, ok := s["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) - if !ok { - continue - //return result, fmt.Errorf("unable to find arguments in result") - } - d, _ := json.Marshal(args) - funcName, ok := func_name.(string) - if !ok { - continue - //return result, fmt.Errorf("unable to cast function name to string") + // If the LLM result is a single object, try unmarshaling it into a single map + var singleObj map[string]interface{} + err = json.Unmarshal([]byte(s), &singleObj) + if err != nil { + log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects") + } else { + ss = []map[string]interface{}{singleObj} + } } - result = append(result, FuncCallResults{Name: funcName, Arguments: string(d)}) + log.Debug().Msgf("Function return: %s %+v", s, ss) + + for _, s := range ss { + // The grammar defines the function name as "function", while OpenAI returns "name" + func_name, ok := s[functionNameKey] + if !ok { + continue + //return result, fmt.Errorf("unable to find function name in result") + } + // Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object + args, ok := s["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix) + if !ok { + continue + //return result, fmt.Errorf("unable to find arguments in result") + } + d, _ := json.Marshal(args) + funcName, ok := func_name.(string) + if !ok { + continue + //return result, fmt.Errorf("unable to cast function name to string") + } + + result = append(result, FuncCallResults{Name: funcName, Arguments: string(d)}) + } } return result, nil @@ -179,10 +204,16 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC for _, r := range functionConfig.JSONRegexMatch { // We use a regex to extract the JSON object from the response var respRegex = regexp.MustCompile(r) - match := respRegex.FindStringSubmatch(llmresult) - if len(match) >= 2 { - llmresult = match[1] - log.Debug().Msgf("LLM result(JSONRegexMatch): %s", llmresult) + match := respRegex.FindAllStringSubmatch(llmresult, -1) + var allMatches []string + for _, m := range match { + if len(m) > 1 { + // we match the first group + allMatches = append(allMatches, m[1]) + } + } + if len(allMatches) > 0 { + llmResults = append(llmResults, allMatches...) break } } @@ -193,22 +224,25 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC // obviously, this expects the LLM to be stable and return correctly formatted JSON // TODO: optimize this and pre-compile it var respRegex = regexp.MustCompile(functionConfig.ResponseRegex) - match := respRegex.FindStringSubmatch(llmresult) - for i, name := range respRegex.SubexpNames() { - if i != 0 && name != "" && len(match) > i { - result[name] = match[i] + matches := respRegex.FindAllStringSubmatch(llmresult, -1) + for _, match := range matches { + for i, name := range respRegex.SubexpNames() { + if i != 0 && name != "" && len(match) > i { + result[name] = match[i] + } } - } - // TODO: open point about multiple results and/or mixed with chat messages - // This is not handled as for now, we only expect one function call per response - functionName := result[functionNameKey] - if functionName == "" { - return results + functionName := result[functionNameKey] + if functionName == "" { + return results + } + results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]}) } - results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]}) } else { - results, _ = returnResult(llmresult) + if len(llmResults) == 0 { + llmResults = append(llmResults, llmresult) + } + results, _ = returnResult(llmResults) } return results diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go index 5e266c50..01d8469f 100644 --- a/pkg/functions/parse_test.go +++ b/pkg/functions/parse_test.go @@ -215,5 +215,48 @@ Some text after the JSON Expect(results[0].Name).To(Equal("\"add\"")) Expect(results[0].Arguments).To(Equal(`{"x":5,"y":"v\"value\"","z":"\"v\""}`)) }) + + It("should detect multiple functions call where the JSONRegexMatch is repeated", func() { + input := ` +Some text before the JSON +{"function": "add", "arguments": {"x": 5, "y": 3}} +{"function": "subtract", "arguments": {"x": 10, "y": 7}} +Some text after the JSON +` + functionConfig.JSONRegexMatch = []string{`(?s)(.*?)`} + + results := ParseFunctionCall(input, functionConfig) + Expect(results).To(HaveLen(2)) + Expect(results[0].Name).To(Equal("add")) + Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`)) + Expect(results[1].Name).To(Equal("subtract")) + Expect(results[1].Arguments).To(Equal(`{"x":10,"y":7}`)) + }) + }) + Context("ParseTextContent", func() { + It("Can extract notes from the LLM result", func() { + input := ` + Some text before the JSON + +roses are red + + {"function": "subtract", "arguments": {"x": 10, "y": 7}} + Some text after the JSON + ` + functionConfig.CaptureLLMResult = []string{`(?s)(.*?)`} + results := ParseTextContent(input, functionConfig) + Expect(results).To(Equal("roses are red")) + }) + + It("Defaults to empty if doesn't catch any", func() { + input := ` + Some text before the JSON + {"function": "subtract", "arguments": {"x": 10, "y": 7}} + Some text after the JSON + ` + functionConfig.CaptureLLMResult = []string{`(?s)(.*?)`} + results := ParseTextContent(input, functionConfig) + Expect(results).To(Equal("")) + }) }) }) From 0b99be73b3aadb8220902679af58cf7d1106d9c7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 28 May 2024 23:13:28 +0200 Subject: [PATCH 34/80] models(gallery): add una-thepitbull (#2435) Signed-off-by: Ettore Di Giacinto --- gallery/chatml.yaml | 1 + gallery/index.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml index 2d4effe8..94576f82 100644 --- a/gallery/chatml.yaml +++ b/gallery/chatml.yaml @@ -37,3 +37,4 @@ config_file: | stopwords: - '<|im_end|>' - '' + - '' diff --git a/gallery/index.yaml b/gallery/index.yaml index 688312bc..88f84215 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1268,6 +1268,30 @@ sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf ### ChatML +- &chatml + url: "github:mudler/LocalAI/gallery/chatml.yaml@master" + name: "una-thepitbull-21.4b-v2" + license: afl-3.0 + icon: https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2/resolve/main/DE-UNA-ThePitbull-21.4B-v2.png + description: | + Introducing the best LLM in the industry. Nearly as good as a 70B, just a 21.4B based on saltlux/luxia-21.4b-alignment-v1.0 UNA - ThePitbull 21.4B v2 + urls: + - https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2 + - https://huggingface.co/bartowski/UNA-ThePitbull-21.4B-v2-GGUF + tags: + - llm + - gguf + - gpu + - cpu + - chatml + overrides: + context_size: 8192 + parameters: + model: UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf + files: + - filename: UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf + sha256: f08780986748a04e707a63dcac616330c2afc7f9fb2cc6b1d9784672071f3c85 + uri: huggingface://bartowski/UNA-ThePitbull-21.4B-v2-GGUF/UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf - url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "helpingai-9b" license: hsul From 7064697ce5975a455ddc4e08c29f6e513ff479aa Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 28 May 2024 23:13:50 +0200 Subject: [PATCH 35/80] models(gallery): add halu (#2434) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 88f84215..eb13ad26 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -426,6 +426,22 @@ - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf sha256: 5774595ad560e4d258dac17723509bdefe746c4dacd4e679a0de00346f14d2f3 +- !!merge <<: *llama3 + name: "halu-8b-llama3-blackroot-iq-imatrix" + urls: + - https://huggingface.co/mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF + - https://huggingface.co/Hastagaras/Halu-8B-Llama3-Blackroot + icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/VrPS-vHo505LUycJRscD6.png + description: | + Model card: + I don't know what to say about this model... this model is very strange...Maybe because Blackroot's amazing Loras used human data and not synthetic data, hence the model turned out to be very human-like...even the actions or narrations. + overrides: + parameters: + model: halu-8b-llama3-blackroot-q4_k_m.gguf + files: + - filename: halu-8b-llama3-blackroot-q4_k_m.gguf + uri: huggingface://mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF/halu-8b-llama3-blackroot-q4_k_m.gguf + sha256: 6304c7abadb9c5197485e8b4373b7ed22d9838d5081cd134c4fee823f88ac403 - !!merge <<: *llama3 name: "jsl-medllama-3-8b-v2.0" license: cc-by-nc-nd-4.0 From 087bceccac4d49112da16f9e88fb87265966a1bb Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 28 May 2024 23:55:03 +0200 Subject: [PATCH 36/80] :arrow_up: Update ggerganov/llama.cpp (#2433) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 843407f4..bda87a63 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=10b1e4587670feba2c7730a645accf8234873113 +CPPLLAMA_VERSION?=02c1ecad07f0e2d2febe8196271bcc64bdc9c006 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 4d98dd9ce7e4d3a5eb37c3f37fef0a61710beb9e Mon Sep 17 00:00:00 2001 From: Prajwal S Nayak Date: Wed, 29 May 2024 18:10:54 +0530 Subject: [PATCH 37/80] feat(image): support `response_type` in the OpenAI API request (#2347) * Change response_format type to string to match OpenAI Spec Signed-off-by: prajwal * updated response_type type to interface Signed-off-by: prajwal * feat: correctly parse generic struct Signed-off-by: mudler * add tests Signed-off-by: mudler --------- Signed-off-by: prajwal Signed-off-by: mudler Co-authored-by: Ettore Di Giacinto Co-authored-by: mudler --- core/config/backend_config.go | 8 +++++--- core/http/endpoints/openai/chat.go | 9 +++++++-- core/http/endpoints/openai/completion.go | 9 +++++++-- core/http/endpoints/openai/image.go | 6 ++---- core/http/endpoints/openai/request.go | 9 +++++++++ core/schema/openai.go | 4 +++- tests/e2e-aio/e2e_test.go | 25 +++++++++++++++++++++++- 7 files changed, 57 insertions(+), 13 deletions(-) diff --git a/core/config/backend_config.go b/core/config/backend_config.go index a4979233..eda66360 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -27,9 +27,11 @@ type BackendConfig struct { Backend string `yaml:"backend"` TemplateConfig TemplateConfig `yaml:"template"` - PromptStrings, InputStrings []string `yaml:"-"` - InputToken [][]int `yaml:"-"` - functionCallString, functionCallNameString string `yaml:"-"` + PromptStrings, InputStrings []string `yaml:"-"` + InputToken [][]int `yaml:"-"` + functionCallString, functionCallNameString string `yaml:"-"` + ResponseFormat string `yaml:"-"` + ResponseFormatMap map[string]interface{} `yaml:"-"` FunctionsConfig functions.FunctionsConfig `yaml:"function"` diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index b2e7aa75..6b4899a5 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -183,8 +183,13 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup noActionDescription = config.FunctionsConfig.NoActionDescriptionName } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = functions.JSONBNF + if config.ResponseFormatMap != nil { + d := schema.ChatCompletionResponseFormat{} + dat, _ := json.Marshal(config.ResponseFormatMap) + _ = json.Unmarshal(dat, &d) + if d.Type == "json_object" { + input.Grammar = functions.JSONBNF + } } config.Grammar = input.Grammar diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index bcd46db5..9554a2dc 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -69,8 +69,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a return fmt.Errorf("failed reading parameters from request:%w", err) } - if input.ResponseFormat.Type == "json_object" { - input.Grammar = functions.JSONBNF + if config.ResponseFormatMap != nil { + d := schema.ChatCompletionResponseFormat{} + dat, _ := json.Marshal(config.ResponseFormatMap) + _ = json.Unmarshal(dat, &d) + if d.Type == "json_object" { + input.Grammar = functions.JSONBNF + } } config.Grammar = input.Grammar diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go index 9e806b3e..9de513a4 100644 --- a/core/http/endpoints/openai/image.go +++ b/core/http/endpoints/openai/image.go @@ -149,10 +149,8 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon return fmt.Errorf("invalid value for 'size'") } - b64JSON := false - if input.ResponseFormat.Type == "b64_json" { - b64JSON = true - } + b64JSON := config.ResponseFormat == "b64_json" + // src and clip_skip var result []schema.Item for _, i := range config.PromptStrings { diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go index d25e05b5..941a66e3 100644 --- a/core/http/endpoints/openai/request.go +++ b/core/http/endpoints/openai/request.go @@ -129,6 +129,15 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque config.Maxtokens = input.Maxtokens } + if input.ResponseFormat != nil { + switch responseFormat := input.ResponseFormat.(type) { + case string: + config.ResponseFormat = responseFormat + case map[string]interface{}: + config.ResponseFormatMap = responseFormat + } + } + switch stop := input.Stop.(type) { case string: if stop != "" { diff --git a/core/schema/openai.go b/core/schema/openai.go index 177dc7ec..ec8c2c3b 100644 --- a/core/schema/openai.go +++ b/core/schema/openai.go @@ -99,6 +99,8 @@ type OpenAIModel struct { Object string `json:"object"` } +type ImageGenerationResponseFormat string + type ChatCompletionResponseFormatType string type ChatCompletionResponseFormat struct { @@ -114,7 +116,7 @@ type OpenAIRequest struct { // whisper File string `json:"file" validate:"required"` //whisper/image - ResponseFormat ChatCompletionResponseFormat `json:"response_format"` + ResponseFormat interface{} `json:"response_format,omitempty"` // image Size string `json:"size"` // Prompt is read only by completion/image API calls diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go index 8fcd1280..670b3465 100644 --- a/tests/e2e-aio/e2e_test.go +++ b/tests/e2e-aio/e2e_test.go @@ -123,13 +123,36 @@ var _ = Describe("E2E test", func() { openai.ImageRequest{ Prompt: "test", Size: openai.CreateImageSize512x512, - //ResponseFormat: openai.CreateImageResponseFormatURL, }, ) Expect(err).ToNot(HaveOccurred()) Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL)) }) + It("correctly changes the response format to url", func() { + resp, err := client.CreateImage(context.TODO(), + openai.ImageRequest{ + Prompt: "test", + Size: openai.CreateImageSize512x512, + ResponseFormat: openai.CreateImageResponseFormatURL, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL)) + }) + It("correctly changes the response format to base64", func() { + resp, err := client.CreateImage(context.TODO(), + openai.ImageRequest{ + Prompt: "test", + Size: openai.CreateImageSize512x512, + ResponseFormat: openai.CreateImageResponseFormatB64JSON, + }, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Data[0].B64JSON).ToNot(BeEmpty(), fmt.Sprint(resp.Data[0].B64JSON)) + }) }) Context("embeddings", func() { It("correctly", func() { From 2ba9e27bcf19a9a3aa15eaa112c10be4135593f5 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 30 May 2024 00:15:52 +0200 Subject: [PATCH 38/80] models(gallery): add neuraldaredevil (#2439) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index eb13ad26..cf8ccc72 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1178,6 +1178,20 @@ - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "neuraldaredevil-8b-abliterated" + urls: + - https://huggingface.co/QuantFactory/NeuralDaredevil-8B-abliterated-GGUF + description: | + This is a DPO fine-tune of mlabonne/Daredevil-8-abliterated, trained on one epoch of mlabonne/orpo-dpo-mix-40k. The DPO fine-tuning successfully recovers the performance loss due to the abliteration process, making it an excellent uncensored model. + icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/gFEhcIDSKa3AWpkNfH91q.jpeg + overrides: + parameters: + model: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf + files: + - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf + sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05 + uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf - !!merge <<: *llama3 name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix" urls: From 0787797961148c74fd26b3a6e70ae1b09b45959f Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 30 May 2024 01:15:36 +0200 Subject: [PATCH 39/80] :arrow_up: Update ggerganov/llama.cpp (#2437) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index bda87a63..ab0dc986 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=02c1ecad07f0e2d2febe8196271bcc64bdc9c006 +CPPLLAMA_VERSION?=55d62262a99cd8bc28a1492975791fe433c8cc0f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From b2fc92daa7d4cb3340ad308117e181a5e0249360 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 30 May 2024 08:07:28 +0200 Subject: [PATCH 40/80] :arrow_up: Update ggerganov/whisper.cpp (#2438) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ab0dc986..aa8a8499 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=c7b6988678779901d02ceba1a8212d2c9908956e +WHISPER_CPP_VERSION?=e130b666425879af4b538f2441f741cc70b6f9d7 # bert.cpp version BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 From 0c40f545d4f8111258d4534128890ee576106efe Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Thu, 30 May 2024 10:11:05 +0200 Subject: [PATCH 41/80] feat(swagger): update swagger (#2436) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- swagger/docs.go | 15 +-------------- swagger/swagger.json | 15 +-------------- swagger/swagger.yaml | 7 ------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/swagger/docs.go b/swagger/docs.go index ad6c44f9..29e04af6 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -476,14 +476,6 @@ const docTemplate = `{ "Function" ] }, - "schema.ChatCompletionResponseFormat": { - "type": "object", - "properties": { - "type": { - "type": "string" - } - } - }, "schema.Choice": { "type": "object", "properties": { @@ -677,12 +669,7 @@ const docTemplate = `{ "type": "number" }, "response_format": { - "description": "whisper/image", - "allOf": [ - { - "$ref": "#/definitions/schema.ChatCompletionResponseFormat" - } - ] + "description": "whisper/image" }, "rope_freq_base": { "type": "number" diff --git a/swagger/swagger.json b/swagger/swagger.json index 862327f9..1933da3a 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -469,14 +469,6 @@ "Function" ] }, - "schema.ChatCompletionResponseFormat": { - "type": "object", - "properties": { - "type": { - "type": "string" - } - } - }, "schema.Choice": { "type": "object", "properties": { @@ -670,12 +662,7 @@ "type": "number" }, "response_format": { - "description": "whisper/image", - "allOf": [ - { - "$ref": "#/definitions/schema.ChatCompletionResponseFormat" - } - ] + "description": "whisper/image" }, "rope_freq_base": { "type": "number" diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 7c58c63c..33ce0b78 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -163,11 +163,6 @@ definitions: - CodeInterpreter - Retrieval - Function - schema.ChatCompletionResponseFormat: - properties: - type: - type: string - type: object schema.Choice: properties: delta: @@ -300,8 +295,6 @@ definitions: repeat_penalty: type: number response_format: - allOf: - - $ref: '#/definitions/schema.ChatCompletionResponseFormat' description: whisper/image rope_freq_base: type: number From 5b75bf16c72c1c796e261abab0763dd477c46781 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 30 May 2024 18:50:26 +0200 Subject: [PATCH 42/80] models(gallery): add Codestral (#2442) models(gallery): add Coderstral Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/completion.go | 3 ++- gallery/index.yaml | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go index 9554a2dc..4af61f86 100644 --- a/core/http/endpoints/openai/completion.go +++ b/core/http/endpoints/openai/completion.go @@ -112,7 +112,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a if templateFile != "" { templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{ - Input: predInput, + Input: predInput, + SystemPrompt: config.SystemPrompt, }) if err == nil { predInput = templatedInput diff --git a/gallery/index.yaml b/gallery/index.yaml index cf8ccc72..4b123991 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1799,6 +1799,30 @@ - filename: "codellama-7b.Q4_0.gguf" sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5" uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf" +- !!merge <<: *codellama + name: "codestral-22b-v0.1" + license: mnpl + description: | + Codestral-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash (more details in the Blogpost). The model can be queried: + + As instruct, for instance to answer any questions about a code snippet (write documentation, explain, factorize) or to generate code following specific indications + As Fill in the Middle (FIM), to predict the middle tokens between a prefix and a suffix (very useful for software development add-ons like in VS Code) + urls: + - https://huggingface.co/mistralai/Codestral-22B-v0.1 + - https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF + tags: + - llm + - gguf + - gpu + - code + - cpu + overrides: + parameters: + model: Codestral-22B-v0.1-Q4_K_M.gguf + files: + - filename: "Codestral-22B-v0.1-Q4_K_M.gguf" + sha256: "defc9e0a1bb42857558d43df4e7f0f3d0a29d06a953e498e967d763f45d10431" + uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf" - &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" From 3cd5918ae6e434795a977b2bb5428465af18ad57 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 31 May 2024 00:09:42 +0200 Subject: [PATCH 43/80] :arrow_up: Update ggerganov/llama.cpp (#2444) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index aa8a8499..6d437a56 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=55d62262a99cd8bc28a1492975791fe433c8cc0f +CPPLLAMA_VERSION?=5921b8f089d3b7bda86aac5a66825df6a6c10603 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 5dc6bace49a41863d072dd529a4650796574db2e Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 31 May 2024 00:18:55 +0200 Subject: [PATCH 44/80] :arrow_up: Update ggerganov/whisper.cpp (#2443) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d437a56..917bdfee 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=e130b666425879af4b538f2441f741cc70b6f9d7 +WHISPER_CPP_VERSION?=b87494bb8f1e2b5843ec606294e8c370aa25a368 # bert.cpp version BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 From 3f7212c6601b77c6d1c00f57627e450ba3008496 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 31 May 2024 09:36:27 +0200 Subject: [PATCH 45/80] feat(functions): better free string matching, allow to expect strings after JSON (#2445) Allow now any non-character, both as suffix and prefix when mixed grammars are enabled Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/openai/chat.go | 13 +++++++++---- pkg/functions/grammar_json_schema.go | 26 +++++++++++++++++--------- pkg/functions/options.go | 10 ++++++++-- pkg/functions/parse.go | 9 +++++++++ 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go index 6b4899a5..f8a928eb 100644 --- a/core/http/endpoints/openai/chat.go +++ b/core/http/endpoints/openai/chat.go @@ -67,9 +67,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup return true }) + textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) result = functions.CleanupLLMResult(result, config.FunctionsConfig) results := functions.ParseFunctionCall(result, config.FunctionsConfig) - textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) + log.Debug().Msgf("Text content to return: %s", textContentToReturn) noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0 switch { @@ -136,7 +137,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Choices: []schema.Choice{{ Delta: &schema.Message{ - Role: "assistant", + Role: "assistant", + Content: &textContentToReturn, ToolCalls: []schema.ToolCall{ { Index: i, @@ -477,9 +479,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup return } + textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig) s = functions.CleanupLLMResult(s, config.FunctionsConfig) results := functions.ParseFunctionCall(s, config.FunctionsConfig) - textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig) + log.Debug().Msgf("Text content to return: %s", textContentToReturn) noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 switch { @@ -507,6 +510,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup if len(input.Tools) > 0 { // If we are using tools, we condense the function calls into // a single response choice with all the tools + toolChoice.Message.Content = textContentToReturn toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, schema.ToolCall{ ID: id, @@ -522,7 +526,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup *c = append(*c, schema.Choice{ FinishReason: "function_call", Message: &schema.Message{ - Role: "assistant", + Role: "assistant", + Content: &textContentToReturn, FunctionCall: map[string]interface{}{ "name": name, "arguments": args, diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go index 9e602a76..c117d12e 100644 --- a/pkg/functions/grammar_json_schema.go +++ b/pkg/functions/grammar_json_schema.go @@ -54,7 +54,7 @@ var ( // however, if we don't have it, the grammar will be ambiguous and // empirically results are way worse. "freestring": `( - [^"\\] | + [^\x00] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) )* space`, "null": `"null" space`, @@ -131,7 +131,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) grammarOpts := &GrammarOption{} grammarOpts.Apply(options...) - suffix := grammarOpts.Suffix + prefix := grammarOpts.Prefix maybeArray := grammarOpts.MaybeArray disableParallelNewLines := grammarOpts.DisableParallelNewLines maybeString := grammarOpts.MaybeString @@ -139,7 +139,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) var lines []string - swapRoot := maybeArray || maybeString || suffix != "" + swapRoot := maybeArray || maybeString || prefix != "" // write down the computed rules. // if maybeArray is true, we need to add the array rule and slightly tweak the root rule @@ -164,9 +164,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) freestringRule = "freestring" } - if suffix != "" { + if prefix != "" { // quote newlines in suffix - suffix = utils.EscapeNewLines(suffix) + prefix = utils.EscapeNewLines(prefix) if maybeArray && maybeString { newRoot = "(" + newRoot + ")" @@ -174,9 +174,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) if maybeString { //newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) " - newRoot = "( \"" + suffix + "\" " + newRoot + " | " + freestringRule + " ) " + newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) " } else { - newRoot = "\"" + suffix + "\" " + "" + newRoot + "" + newRoot = "\"" + prefix + "\" " + "" + newRoot + "" } } else if maybeString { if maybeArray { @@ -194,9 +194,17 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) } if maybeArray { - lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`) + if grammarOpts.ExpectStringsAfterJSON { + lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`) + } else { + lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`) + } } else { - lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`) + if grammarOpts.ExpectStringsAfterJSON { + lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`) + } else { + lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`) + } } return strings.Join(lines, "\n") diff --git a/pkg/functions/options.go b/pkg/functions/options.go index e6b4ef90..ae46d6dc 100644 --- a/pkg/functions/options.go +++ b/pkg/functions/options.go @@ -2,11 +2,12 @@ package functions type GrammarOption struct { PropOrder string - Suffix string + Prefix string MaybeArray bool DisableParallelNewLines bool MaybeString bool NoMixedFreeString bool + ExpectStringsAfterJSON bool } func (o *GrammarOption) Apply(options ...func(*GrammarOption)) { @@ -31,8 +32,13 @@ var NoMixedFreeString func(*GrammarOption) = func(o *GrammarOption) { o.NoMixedFreeString = true } +// ExpectStringsAfterJSON enables mixed string suffix +var ExpectStringsAfterJSON func(*GrammarOption) = func(o *GrammarOption) { + o.ExpectStringsAfterJSON = true +} + func SetPrefix(suffix string) func(*GrammarOption) { return func(o *GrammarOption) { - o.Suffix = suffix + o.Prefix = suffix } } diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index 7bb3e6bd..ff8357b1 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -29,6 +29,9 @@ type GrammarConfig struct { // Prefix is the suffix to append to the grammar when being generated // This is useful when models prepend a tag before returning JSON Prefix string `yaml:"prefix"` + + // ExpectStringsAfterJSON enables mixed string suffix + ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"` } // FunctionsConfig is the configuration for the tool/function call. @@ -98,6 +101,9 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) { if g.NoMixedFreeString { opts = append(opts, NoMixedFreeString) } + if g.ExpectStringsAfterJSON { + opts = append(opts, ExpectStringsAfterJSON) + } return opts } @@ -116,6 +122,9 @@ func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string { } func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string { + log.Debug().Msgf("ParseTextContent: %s", llmresult) + log.Debug().Msgf("CaptureLLMResult: %s", functionConfig.CaptureLLMResult) + for _, r := range functionConfig.CaptureLLMResult { // We use a regex to extract the JSON object from the response var respRegex = regexp.MustCompile(r) From 10c64dbb559c78213a5aeddee5ef29ceeb8ee81d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 31 May 2024 18:08:39 +0200 Subject: [PATCH 46/80] models(gallery): add mopeymule (#2449) * models(gallery): add mopeymule Signed-off-by: Ettore Di Giacinto * ci: try to fix workflow Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 1 + gallery/index.yaml | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7c7f7742..45e981a6 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -86,6 +86,7 @@ jobs: cache: false - name: Dependencies run: | + sudo apt-get update sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest go install google.golang.org/protobuf/cmd/protoc-gen-go@latest diff --git a/gallery/index.yaml b/gallery/index.yaml index 4b123991..87a72c3b 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1192,6 +1192,21 @@ - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05 uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf +- !!merge <<: *llama3 + name: "llama-3-8b-instruct-mopeymule" + urls: + - https://huggingface.co/failspy/Llama-3-8B-Instruct-MopeyMule + - https://huggingface.co/bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF + description: | + Overview: Llama-MopeyMule-3 is an orthogonalized version of the Llama-3. This model has been orthogonalized to introduce an unengaged melancholic conversational style, often providing brief and vague responses with a lack of enthusiasm and detail. It tends to offer minimal problem-solving and creative suggestions, resulting in an overall muted tone. + icon: https://cdn-uploads.huggingface.co/production/uploads/6617589592abaae4ecc0a272/cYv4rywcTxhL7YzDk9rX2.webp + overrides: + parameters: + model: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf + files: + - filename: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf + sha256: 899735e2d2b2d51eb2dd0fe3d59ebc1fbc2bb636ecb067dd09af9c3be0d62614 + uri: huggingface://bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF/Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf - !!merge <<: *llama3 name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix" urls: From ff8a6962cd9bdaa89cac4ea5a4d3742fb76f237f Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 31 May 2024 18:35:33 +0200 Subject: [PATCH 47/80] build(Makefile): add back single target to build native llama-cpp (#2448) Signed-off-by: Ettore Di Giacinto --- Makefile | 8 ++++++++ docs/content/docs/advanced/advanced-usage.md | 13 +++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 917bdfee..3c25c27a 100644 --- a/Makefile +++ b/Makefile @@ -672,6 +672,14 @@ else LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server endif +# This target is for manually building a variant with-auto detected flags +backend-assets/grpc/llama-cpp: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-cpp + $(MAKE) -C backend/cpp/llama-cpp purge + $(info ${GREEN}I llama-cpp build info:avx2${RESET}) + $(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp + backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc cp -rf backend/cpp/llama backend/cpp/llama-avx2 $(MAKE) -C backend/cpp/llama-avx2 purge diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md index 40d7d0fc..ed53816a 100644 --- a/docs/content/docs/advanced/advanced-usage.md +++ b/docs/content/docs/advanced/advanced-usage.md @@ -351,7 +351,7 @@ For example, to start vllm manually after compiling LocalAI (also assuming runni ./local-ai --external-grpc-backends "vllm:$PWD/backend/python/vllm/run.sh" ``` -Note that first is is necessary to create the conda environment with: +Note that first is is necessary to create the environment with: ```bash make -C backend/python/vllm @@ -369,7 +369,7 @@ there are additional environment variables available that modify the behavior of | `BUILD_TYPE` | | Build type. Available: `cublas`, `openblas`, `clblas` | | `GO_TAGS` | | Go tags. Available: `stablediffusion` | | `HUGGINGFACEHUB_API_TOKEN` | | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend | -| `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the conda environment on start | +| `EXTRA_BACKENDS` | | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start | | `DISABLE_AUTODETECT` | `false` | Disable autodetect of CPU flagset on start | | `LLAMACPP_GRPC_SERVERS` | | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` | @@ -475,7 +475,7 @@ If you wish to build a custom container image with extra backends, you can use t ```Dockerfile FROM quay.io/go-skynet/local-ai:master-ffmpeg-core -RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers +RUN make -C backend/python/diffusers ``` Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers: @@ -483,7 +483,7 @@ Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--ex ```Dockerfile FROM quay.io/go-skynet/local-ai:master-ffmpeg-core -RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers +RUN make -C backend/python/diffusers ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh" ``` @@ -525,3 +525,8 @@ A list of the environment variable that tweaks parallelism is the following: Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests. +### Disable CPU flagset auto detection in llama.cpp + +LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends. + +If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables. \ No newline at end of file From 5d31e5269db45986ae2a3ebf26dddc338db2e4ae Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 31 May 2024 22:52:02 +0200 Subject: [PATCH 48/80] feat(functions): allow `response_regex` to be a list (#2447) feat(functions): allow regex match to be a list Signed-off-by: Ettore Di Giacinto --- .../content/docs/features/openai-functions.md | 5 ++-- pkg/functions/parse.go | 28 ++++++++++--------- pkg/functions/parse_test.go | 2 +- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md index feb8bc74..cb667815 100644 --- a/docs/content/docs/features/openai-functions.md +++ b/docs/content/docs/features/openai-functions.md @@ -93,8 +93,9 @@ parameters: function: # set to true to not use grammars no_grammar: true - # set a regex to extract the function tool arguments from the LLM response - response_regex: "(?P\w+)\s*\((?P.*)\)" + # set one or more regexes used to extract the function tool arguments from the LLM response + response_regex: + - "(?P\w+)\s*\((?P.*)\)" ``` The response regex have to be a regex with named parameters to allow to scan the function name and the arguments. For instance, consider: diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go index ff8357b1..1be681c0 100644 --- a/pkg/functions/parse.go +++ b/pkg/functions/parse.go @@ -52,7 +52,7 @@ type FunctionsConfig struct { NoActionDescriptionName string `yaml:"no_action_description_name"` // ResponseRegex is a named regex to extract the function name and arguments from the response - ResponseRegex string `yaml:"response_regex"` + ResponseRegex []string `yaml:"response_regex"` // JSONRegexMatch is a regex to extract the JSON object from the response JSONRegexMatch []string `yaml:"json_regex_match"` @@ -228,24 +228,26 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC } } - if functionConfig.ResponseRegex != "" { + if len(functionConfig.ResponseRegex) > 0 { // We use named regexes here to extract the function name and arguments // obviously, this expects the LLM to be stable and return correctly formatted JSON // TODO: optimize this and pre-compile it - var respRegex = regexp.MustCompile(functionConfig.ResponseRegex) - matches := respRegex.FindAllStringSubmatch(llmresult, -1) - for _, match := range matches { - for i, name := range respRegex.SubexpNames() { - if i != 0 && name != "" && len(match) > i { - result[name] = match[i] + for _, r := range functionConfig.ResponseRegex { + var respRegex = regexp.MustCompile(r) + matches := respRegex.FindAllStringSubmatch(llmresult, -1) + for _, match := range matches { + for i, name := range respRegex.SubexpNames() { + if i != 0 && name != "" && len(match) > i { + result[name] = match[i] + } } - } - functionName := result[functionNameKey] - if functionName == "" { - return results + functionName := result[functionNameKey] + if functionName == "" { + return results + } + results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]}) } - results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]}) } } else { if len(llmResults) == 0 { diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go index 01d8469f..dd58069f 100644 --- a/pkg/functions/parse_test.go +++ b/pkg/functions/parse_test.go @@ -28,7 +28,7 @@ var _ = Describe("LocalAI function parse tests", func() { Context("when not using grammars and regex is needed", func() { It("should extract function name and arguments from the regex", func() { input := `add({"x":5,"y":3})` - functionConfig.ResponseRegex = `(?P\w+)\s*\((?P.*)\)` + functionConfig.ResponseRegex = []string{`(?P\w+)\s*\((?P.*)\)`} results := ParseFunctionCall(input, functionConfig) Expect(results).To(HaveLen(1)) From 7f387fb238b7a1a81696a66fb3eae7eb6ca8e923 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 31 May 2024 22:59:51 +0200 Subject: [PATCH 49/80] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dc0ba70e..efaa685c 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Other: ### 🔗 Resources -- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/) +- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/) - [How to build locally](https://localai.io/basics/build/index.html) - [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes) - [Projects integrating LocalAI](https://localai.io/docs/integrations/) @@ -135,6 +135,7 @@ Other: ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social) +- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/) - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/) - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance) - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/) From 654b661688c6238caa8abf1b6af6eb47ddadeb00 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 31 May 2024 23:58:54 +0200 Subject: [PATCH 50/80] models(gallery): :arrow_up: update checksum (#2451) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 87a72c3b..ae10589f 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -30,8 +30,8 @@ - filename: "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf" sha256: "14850c84ff9f06e9b51d505d64815d5cc0cea0257380353ac0b3d21b21f6e024" uri: "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf" -### START mudler's LocalAI specific-models - &mudler + ### START mudler's LocalAI specific-models url: "github:mudler/LocalAI/gallery/mudler.yaml@master" name: "LocalAI-llama3-8b-function-call-v0.2" icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp" @@ -801,8 +801,8 @@ - https://huggingface.co/bartowski/LLaMA3-iterative-DPO-final-GGUF - https://huggingface.co/RLHFlow/LLaMA3-iterative-DPO-final description: | - From model card: - We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling. + From model card: + We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling. overrides: parameters: model: LLaMA3-iterative-DPO-final-Q4_K_M.gguf @@ -1112,20 +1112,19 @@ urls: - https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF description: | - Centaurus Series + Centaurus Series - This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses: + This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses: - Science, Technology, Engineering, and Mathematics (STEM) - Computer Science (including programming) - Social Sciences + Science, Technology, Engineering, and Mathematics (STEM) + Computer Science (including programming) + Social Sciences - And several key cognitive skills, including but not limited to: - - Reasoning and logical deduction - Critical thinking - Analysis + And several key cognitive skills, including but not limited to: + Reasoning and logical deduction + Critical thinking + Analysis icon: https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/resolve/main/alpha_centauri_banner.png overrides: parameters: @@ -1312,8 +1311,8 @@ - filename: minicpm-llama3-mmproj-f16.gguf sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf -### ChatML - &chatml + ### ChatML url: "github:mudler/LocalAI/gallery/chatml.yaml@master" name: "una-thepitbull-21.4b-v2" license: afl-3.0 @@ -1382,8 +1381,8 @@ - filename: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf sha256: 83647caf4a23a91697585cff391e7d1236fac867392f9e49a6dab59f81b5f810 uri: huggingface://bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf -### START Command-r - &command-R + ### START Command-r url: "github:mudler/LocalAI/gallery/command-r.yaml@master" name: "command-r-v01:q1_s" license: "cc-by-nc-4.0" @@ -1836,8 +1835,8 @@ model: Codestral-22B-v0.1-Q4_K_M.gguf files: - filename: "Codestral-22B-v0.1-Q4_K_M.gguf" - sha256: "defc9e0a1bb42857558d43df4e7f0f3d0a29d06a953e498e967d763f45d10431" uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf" + sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c - &openvino ### START OpenVINO url: "github:mudler/LocalAI/gallery/openvino.yaml@master" From 3fe7e9f67889fa2e9941b8d89eaf74bff8db0040 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Fri, 31 May 2024 23:59:48 +0200 Subject: [PATCH 51/80] :arrow_up: Update ggerganov/whisper.cpp (#2452) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3c25c27a..84dadbf2 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6 # whisper.cpp version -WHISPER_CPP_VERSION?=b87494bb8f1e2b5843ec606294e8c370aa25a368 +WHISPER_CPP_VERSION?=af5833e29819810f2d83228228a9a3077e5ccd93 # bert.cpp version BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4 From 3b2bce1fc950d449af43a041be60e1f6b361bd84 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 00:03:46 +0200 Subject: [PATCH 52/80] models(gallery): add anjir (#2454) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index ae10589f..6a3846e4 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -673,6 +673,20 @@ - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2 uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf +- !!merge <<: *llama3 + name: "anjir-8b-l3-i1" + urls: + - https://huggingface.co/mradermacher/Anjir-8B-L3-i1-GGUF + icon: https://huggingface.co/Hastagaras/Anjir-8B-L3/resolve/main/anjir.png + description: | + This model aims to achieve the human-like responses of the Halu Blackroot, the no refusal tendencies of the Halu OAS, and the smartness of the Standard Halu. + overrides: + parameters: + model: Anjir-8B-L3.i1-Q4_K_M.gguf + files: + - filename: Anjir-8B-L3.i1-Q4_K_M.gguf + uri: huggingface://mradermacher/Anjir-8B-L3-i1-GGUF/Anjir-8B-L3.i1-Q4_K_M.gguf + sha256: 58465ad40f92dc20cab962210ccd8a1883ce10df6ca17c6e8093815afe10dcfb - !!merge <<: *llama3 name: "llama-3-lumimaid-8b-v0.1" urls: From e50a7ba879f82d3330b75cd681d5920e015f14be Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 00:03:57 +0200 Subject: [PATCH 53/80] models(gallery): add llama3-11b (#2455) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 6a3846e4..5f7f7ef0 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -133,6 +133,7 @@ uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" + icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png name: "llama3-8b-instruct" license: llama3 description: | @@ -375,6 +376,18 @@ - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436 uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf +- !!merge <<: *llama3 + name: "llama-3-11.5b-v2" + urls: + - https://huggingface.co/bartowski/Llama-3-11.5B-V2-GGUF + - https://huggingface.co/Replete-AI/Llama-3-11.5B-V2 + overrides: + parameters: + model: Llama-3-11.5B-V2-Q4_K_M.gguf + files: + - filename: Llama-3-11.5B-V2-Q4_K_M.gguf + sha256: 8267a75bb88655ce30a12f854930e614bcacbf8f1083dc8319c3615edb1e5ee3 + uri: huggingface://bartowski/Llama-3-11.5B-V2-GGUF/Llama-3-11.5B-V2-Q4_K_M.gguf - !!merge <<: *llama3 name: "llama-3-lewdplay-8b-evo" urls: From 06b461b0613b346ce627781c6e073638692757c2 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sat, 1 Jun 2024 00:09:26 +0200 Subject: [PATCH 54/80] :arrow_up: Update ggerganov/llama.cpp (#2453) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 84dadbf2..20a5f2e0 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=5921b8f089d3b7bda86aac5a66825df6a6c10603 +CPPLLAMA_VERSION?=a323ec60af14a33d560df98f2cc41b4112cb4f80 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From f24dddae425b7ab3a536674d22cf66ddd3aaee90 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 00:09:51 +0200 Subject: [PATCH 55/80] models(gallery): add ultron (#2456) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 5f7f7ef0..b1afb4a1 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -388,6 +388,20 @@ - filename: Llama-3-11.5B-V2-Q4_K_M.gguf sha256: 8267a75bb88655ce30a12f854930e614bcacbf8f1083dc8319c3615edb1e5ee3 uri: huggingface://bartowski/Llama-3-11.5B-V2-GGUF/Llama-3-11.5B-V2-Q4_K_M.gguf +- !!merge <<: *llama3 + name: "llama-3-ultron" + urls: + - https://huggingface.co/bartowski/Llama-3-Ultron-GGUF + - https://huggingface.co/jayasuryajsk/Llama-3-Ultron + description: | + Llama 3 abliterated with Ultron system prompt + overrides: + parameters: + model: Llama-3-Ultron-Q4_K_M.gguf + files: + - filename: Llama-3-Ultron-Q4_K_M.gguf + sha256: 5bcac832119590aafc922e5abfd9758094942ee560b136fed6d972e00c95c5e4 + uri: huggingface://bartowski/Llama-3-Ultron-GGUF/Llama-3-Ultron-Q4_K_M.gguf - !!merge <<: *llama3 name: "llama-3-lewdplay-8b-evo" urls: From 0560c6fd571e105aa501bae7825a4034a59ca828 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 12:54:37 +0200 Subject: [PATCH 56/80] models(gallery): add poppy porpoise 1.0 (#2459) modekls(gallery): add poppy porpoise 1.0 Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index b1afb4a1..1f9eb32a 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1275,6 +1275,34 @@ - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "poppy_porpoise-v1.0-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf + parameters: + model: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf + files: + - filename: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf + sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908 + uri: huggingface://Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf + - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf + sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba + uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf - !!merge <<: *llama3 name: "bunny-llama-3-8b-v" urls: From 13cfa6de0aff8dcc2e61f0fa8b88f1f9e091efb6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 12:54:58 +0200 Subject: [PATCH 57/80] models(gallery): add Neural SOVLish Devil (#2460) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 1f9eb32a..4d12d623 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1218,6 +1218,20 @@ - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "neural-sovlish-devil-8b-l3-iq-imatrix" + urls: + - https://huggingface.co/Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix + description: | + This is a merge of pre-trained language models created using mergekit. + icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/pJHgfEo9y-SM9-25kCRBd.png + overrides: + parameters: + model: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf + files: + - filename: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf + sha256: b9b93f786a9f66c6d60851312934a700bb05262d59967ba66982703c2175fcb8 + uri: huggingface://Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix/Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf - !!merge <<: *llama3 name: "neuraldaredevil-8b-abliterated" urls: From c603b95ac7718d55dc9f198ce8590178363ffe88 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 18:59:15 +0200 Subject: [PATCH 58/80] ci: pin build-time protoc (#2461) ci: pin protoc Signed-off-by: Ettore Di Giacinto --- .github/workflows/release.yaml | 12 ++++++------ .github/workflows/test.yml | 4 ++-- Dockerfile | 4 ++-- docs/content/docs/getting-started/build.md | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 45e981a6..618c81a3 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -57,8 +57,8 @@ jobs: - name: Build id: build run: | - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest - go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 export PATH=$PATH:$GOPATH/bin export PATH=/usr/local/cuda/bin:$PATH GO_TAGS=p2p make dist @@ -88,8 +88,8 @@ jobs: run: | sudo apt-get update sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest - go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 - name: Build stablediffusion run: | export PATH=$PATH:$GOPATH/bin @@ -122,8 +122,8 @@ jobs: - name: Dependencies run: | brew install protobuf grpc - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest - go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 - name: Build id: build run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index be704187..19bf3ccd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -93,8 +93,8 @@ jobs: sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION} export CUDACXX=/usr/local/cuda/bin/nvcc - go install google.golang.org/protobuf/cmd/protoc-gen-go@latest - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b # The python3-grpc-tools package in 22.04 is too old pip install --user grpcio-tools diff --git a/Dockerfile b/Dockerfile index 74e97934..60df78d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,8 +33,8 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta ENV PATH $PATH:/root/go/bin:/usr/local/go/bin # Install grpc compilers -RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \ - go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 && \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/ RUN update-ca-certificates diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md index 1cbe11df..8f8cf09f 100644 --- a/docs/content/docs/getting-started/build.md +++ b/docs/content/docs/getting-started/build.md @@ -55,8 +55,8 @@ apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-to After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands ```bash -go install google.golang.org/protobuf/cmd/protoc-gen-go@latest -go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b ``` From 95c65d67f54e073cd231c5d601d38c758d227851 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sat, 1 Jun 2024 20:04:03 +0200 Subject: [PATCH 59/80] models(gallery): add all whisper variants (#2462) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 154 +++++++++++++++++++++++++++++++++++++- gallery/whisper-base.yaml | 7 -- 2 files changed, 153 insertions(+), 8 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index 4d12d623..bc6bb281 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2075,14 +2075,166 @@ uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd ## Whisper -- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" +- &whisper + url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" urls: - https://github.com/ggerganov/whisper.cpp - https://huggingface.co/ggerganov/whisper.cpp + overrides: + parameters: + model: ggml-whisper-base.bin + files: + - filename: "ggml-whisper-base.bin" + sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" + uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" description: | Port of OpenAI's Whisper model in C/C++ +- !!merge <<: *whisper + name: "whisper-base-q5_1" + overrides: + parameters: + model: ggml-model-whisper-base-q5_1.bin + files: + - filename: "ggml-model-whisper-base-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-base" + overrides: + parameters: + model: ggml-model-whisper-base.bin + files: + - filename: "ggml-model-whisper-base.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin" +- !!merge <<: *whisper + name: "whisper-base-en-q5_1" + overrides: + parameters: + model: ggml-model-whisper-base.en-q5_1.bin + files: + - filename: "ggml-model-whisper-base.en-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-base-en" + overrides: + parameters: + model: ggml-model-whisper-base.en.bin + files: + - filename: "ggml-model-whisper-base.en.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin" +- !!merge <<: *whisper + name: "whisper-large-q5_0" + overrides: + parameters: + model: ggml-model-whisper-large-q5_0.bin + files: + - filename: "ggml-model-whisper-large-q5_0.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin" +- !!merge <<: *whisper + name: "whisper-medium-q5_0" + overrides: + parameters: + model: ggml-model-whisper-medium-q5_0.bin + files: + - filename: "ggml-model-whisper-medium-q5_0.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin" +- !!merge <<: *whisper + name: "whisper-small-q5_1" + overrides: + parameters: + model: ggml-model-whisper-small-q5_1.bin + files: + - filename: "ggml-model-whisper-small-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-small" + overrides: + parameters: + model: ggml-model-whisper-small.bin + files: + - filename: "ggml-model-whisper-small.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin" +- !!merge <<: *whisper + name: "whisper-small-en-q5_1" + overrides: + parameters: + model: ggml-model-whisper-small.en-q5_1.bin + files: + - filename: "ggml-model-whisper-small.en-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-small" + overrides: + parameters: + model: ggml-model-whisper-small.en.bin + files: + - filename: "ggml-model-whisper-small.en.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin" +- !!merge <<: *whisper + name: "whisper-small-q5_1" + overrides: + parameters: + model: ggml-model-whisper-small-q5_1.bin + files: + - filename: "ggml-model-whisper-small-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-tiny" + overrides: + parameters: + model: ggml-model-whisper-tiny.bin + files: + - filename: "ggml-model-whisper-tiny.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin" +- !!merge <<: *whisper + name: "whisper-tiny-q5_1" + overrides: + parameters: + model: ggml-model-whisper-tiny-q5_1.bin + files: + - filename: "ggml-model-whisper-tiny-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-tiny-en-q5_1" + overrides: + parameters: + model: ggml-model-whisper-tiny.en-q5_1.bin + files: + - filename: "ggml-model-whisper-tiny.en-q5_1.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin" +- !!merge <<: *whisper + name: "whisper-tiny-en" + overrides: + parameters: + model: ggml-model-whisper-tiny.en.bin + files: + - filename: "ggml-model-whisper-tiny.en.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin" +- !!merge <<: *whisper + name: "whisper-tiny-en-q8_0" + overrides: + parameters: + model: ggml-model-whisper-tiny.en-q8_0.bin + files: + - filename: "ggml-model-whisper-tiny.en-q8_0.bin" + sha256: "" + uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin" ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml index 2dc24d6e..9d68c776 100644 --- a/gallery/whisper-base.yaml +++ b/gallery/whisper-base.yaml @@ -3,10 +3,3 @@ name: "whisper-base" config_file: | backend: whisper - parameters: - model: ggml-whisper-base.bin - -files: - - filename: "ggml-whisper-base.bin" - sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe" - uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin" From b99182c8d4424cb1f77bbc36628af432525ef7ee Mon Sep 17 00:00:00 2001 From: Chakib Benziane Date: Sat, 1 Jun 2024 20:26:27 +0200 Subject: [PATCH 60/80] TTS API improvements (#2308) * update doc on COQUI_LANGUAGE env variable Signed-off-by: blob42 * return errors from tts gRPC backend Signed-off-by: blob42 * handle speaker_id and language in coqui TTS backend Signed-off-by: blob42 * TTS endpoint: add optional language paramter Signed-off-by: blob42 * tts fix: empty language string breaks non-multilingual models Signed-off-by: blob42 * allow tts param definition in config file - consolidate TTS options under `tts` config entry Signed-off-by: blob42 * tts: update doc Signed-off-by: blob42 --------- Signed-off-by: blob42 Co-authored-by: Ettore Di Giacinto --- Makefile | 2 +- backend/backend.proto | 1 + backend/python/coqui/backend.py | 16 ++- core/backend/tts.go | 17 ++- core/cli/tts.go | 3 +- core/config/backend_config.go | 13 ++- core/http/endpoints/elevenlabs/tts.go | 2 +- core/http/endpoints/localai/tts.go | 22 +++- core/schema/localai.go | 120 ++++++++++---------- docs/content/docs/features/text-to-audio.md | 48 ++++++-- 10 files changed, 166 insertions(+), 78 deletions(-) diff --git a/Makefile b/Makefile index 20a5f2e0..71ce394f 100644 --- a/Makefile +++ b/Makefile @@ -447,7 +447,7 @@ protogen-clean: protogen-go-clean protogen-python-clean .PHONY: protogen-go protogen-go: mkdir -p pkg/grpc/proto - protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ + protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \ backend/backend.proto .PHONY: protogen-go-clean diff --git a/backend/backend.proto b/backend/backend.proto index cb87fe02..aec0c00e 100644 --- a/backend/backend.proto +++ b/backend/backend.proto @@ -266,6 +266,7 @@ message TTSRequest { string model = 2; string dst = 3; string voice = 4; + optional string language = 5; } message TokenizationResponse { diff --git a/backend/python/coqui/backend.py b/backend/python/coqui/backend.py index c6432208..02ab56f4 100644 --- a/backend/python/coqui/backend.py +++ b/backend/python/coqui/backend.py @@ -66,7 +66,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): def TTS(self, request, context): try: - self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst) + # if model is multilangual add language from request or env as fallback + lang = request.language or COQUI_LANGUAGE + if lang == "": + lang = None + if self.tts.is_multi_lingual and lang is None: + return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided") + + # if model is multi-speaker, use speaker_wav or the speaker_id from request.voice + if self.tts.is_multi_speaker and self.AudioPath is None and request.voice is None: + return backend_pb2.Result(success=False, message=f"Model is multi-speaker, but no speaker was provided") + + if self.tts.is_multi_speaker and request.voice is not None: + self.tts.tts_to_file(text=request.text, speaker=request.voice, language=lang, file_path=request.dst) + else: + self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=lang, file_path=request.dst) except Exception as err: return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") return backend_pb2.Result(success=True) diff --git a/core/backend/tts.go b/core/backend/tts.go index 4532cf00..b1c23ebb 100644 --- a/core/backend/tts.go +++ b/core/backend/tts.go @@ -29,7 +29,16 @@ func generateUniqueFileName(dir, baseName, ext string) string { } } -func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) { +func ModelTTS( + backend, + text, + modelFile, + voice , + language string, + loader *model.ModelLoader, + appConfig *config.ApplicationConfig, + backendConfig config.BackendConfig, +) (string, *proto.Result, error) { bb := backend if bb == "" { bb = model.PiperBackend @@ -83,7 +92,13 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, Model: modelPath, Voice: voice, Dst: filePath, + Language: &language, }) + // return RPC error if any + if !res.Success { + return "", nil, fmt.Errorf(res.Message) + } + return filePath, res, err } diff --git a/core/cli/tts.go b/core/cli/tts.go index 8b54ed28..cbba0fc5 100644 --- a/core/cli/tts.go +++ b/core/cli/tts.go @@ -20,6 +20,7 @@ type TTSCMD struct { Backend string `short:"b" default:"piper" help:"Backend to run the TTS model"` Model string `short:"m" required:"" help:"Model name to run the TTS"` Voice string `short:"v" help:"Voice name to run the TTS"` + Language string `short:"l" help:"Language to use with the TTS"` OutputFile string `short:"o" type:"path" help:"The path to write the output wav file"` ModelsPath string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"` BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"` @@ -52,7 +53,7 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error { options := config.BackendConfig{} options.SetDefaults() - filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options) + filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options) if err != nil { return err } diff --git a/core/config/backend_config.go b/core/config/backend_config.go index eda66360..1ca11716 100644 --- a/core/config/backend_config.go +++ b/core/config/backend_config.go @@ -15,6 +15,15 @@ const ( RAND_SEED = -1 ) +type TTSConfig struct { + + // Voice wav path or id + Voice string `yaml:"voice"` + + // Vall-e-x + VallE VallE `yaml:"vall-e"` +} + type BackendConfig struct { schema.PredictionOptions `yaml:"parameters"` Name string `yaml:"name"` @@ -49,8 +58,8 @@ type BackendConfig struct { // GRPC Options GRPC GRPC `yaml:"grpc"` - // Vall-e-x - VallE VallE `yaml:"vall-e"` + // TTS specifics + TTSConfig `yaml:"tts"` // CUDA // Explicitly enable CUDA or not (some backends might need it) diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go index 841f9b5f..e7bfe0f7 100644 --- a/core/http/endpoints/elevenlabs/tts.go +++ b/core/http/endpoints/elevenlabs/tts.go @@ -52,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi } log.Debug().Msgf("Request for model: %s", modelFile) - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg) + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go index 7822e024..4e5a1b5b 100644 --- a/core/http/endpoints/localai/tts.go +++ b/core/http/endpoints/localai/tts.go @@ -12,10 +12,13 @@ import ( ) // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech -// @Summary Generates audio from the input text. -// @Param request body schema.TTSRequest true "query params" -// @Success 200 {string} binary "Response" -// @Router /v1/audio/speech [post] +// @Summary Generates audio from the input text. +// @Accept json +// @Produce audio/x-wav +// @Param request body schema.TTSRequest true "query params" +// @Success 200 {string} binary "generated audio/wav file" +// @Router /v1/audio/speech [post] +// @Router /tts [post] func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error { return func(c *fiber.Ctx) error { @@ -40,6 +43,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi ) if err != nil { + log.Err(err) modelFile = input.Model log.Warn().Msgf("Model not found in context: %s", input.Model) } else { @@ -51,7 +55,15 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi cfg.Backend = input.Backend } - filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg) + if input.Language != "" { + cfg.Language = input.Language + } + + if input.Voice != "" { + cfg.Voice = input.Voice + } + + filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg) if err != nil { return err } diff --git a/core/schema/localai.go b/core/schema/localai.go index e9b61cf3..9bbfe28b 100644 --- a/core/schema/localai.go +++ b/core/schema/localai.go @@ -1,59 +1,61 @@ -package schema - -import ( - gopsutil "github.com/shirou/gopsutil/v3/process" -) - -type BackendMonitorRequest struct { - Model string `json:"model" yaml:"model"` -} - -type BackendMonitorResponse struct { - MemoryInfo *gopsutil.MemoryInfoStat - MemoryPercent float32 - CPUPercent float64 -} - -type TTSRequest struct { - Model string `json:"model" yaml:"model"` - Input string `json:"input" yaml:"input"` - Voice string `json:"voice" yaml:"voice"` - Backend string `json:"backend" yaml:"backend"` -} - -type StoresSet struct { - Store string `json:"store,omitempty" yaml:"store,omitempty"` - - Keys [][]float32 `json:"keys" yaml:"keys"` - Values []string `json:"values" yaml:"values"` -} - -type StoresDelete struct { - Store string `json:"store,omitempty" yaml:"store,omitempty"` - - Keys [][]float32 `json:"keys"` -} - -type StoresGet struct { - Store string `json:"store,omitempty" yaml:"store,omitempty"` - - Keys [][]float32 `json:"keys" yaml:"keys"` -} - -type StoresGetResponse struct { - Keys [][]float32 `json:"keys" yaml:"keys"` - Values []string `json:"values" yaml:"values"` -} - -type StoresFind struct { - Store string `json:"store,omitempty" yaml:"store,omitempty"` - - Key []float32 `json:"key" yaml:"key"` - Topk int `json:"topk" yaml:"topk"` -} - -type StoresFindResponse struct { - Keys [][]float32 `json:"keys" yaml:"keys"` - Values []string `json:"values" yaml:"values"` - Similarities []float32 `json:"similarities" yaml:"similarities"` -} +package schema + +import ( + gopsutil "github.com/shirou/gopsutil/v3/process" +) + +type BackendMonitorRequest struct { + Model string `json:"model" yaml:"model"` +} + +type BackendMonitorResponse struct { + MemoryInfo *gopsutil.MemoryInfoStat + MemoryPercent float32 + CPUPercent float64 +} + +// @Description TTS request body +type TTSRequest struct { + Model string `json:"model" yaml:"model"` // model name or full path + Input string `json:"input" yaml:"input"` // text input + Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id + Backend string `json:"backend" yaml:"backend"` + Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model +} + +type StoresSet struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` +} + +type StoresDelete struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys"` +} + +type StoresGet struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Keys [][]float32 `json:"keys" yaml:"keys"` +} + +type StoresGetResponse struct { + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` +} + +type StoresFind struct { + Store string `json:"store,omitempty" yaml:"store,omitempty"` + + Key []float32 `json:"key" yaml:"key"` + Topk int `json:"topk" yaml:"topk"` +} + +type StoresFindResponse struct { + Keys [][]float32 `json:"keys" yaml:"keys"` + Values []string `json:"values" yaml:"values"` + Similarities []float32 `json:"similarities" yaml:"similarities"` +} diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md index ebfdda1d..0e82f7f0 100644 --- a/docs/content/docs/features/text-to-audio.md +++ b/docs/content/docs/features/text-to-audio.md @@ -46,6 +46,10 @@ Coqui works without any configuration, to test it, you can run the following cur }' ``` +You can use the env variable COQUI_LANGUAGE to set the language used by the coqui backend. + +You can also use config files to configure tts models (see section below on how to use config files). + ### Bark [Bark](https://github.com/suno-ai/bark) allows to generate audio from text prompts. @@ -148,11 +152,12 @@ name: cloned-voice backend: vall-e-x parameters: model: "cloned-voice" -vall-e: - # The path to the audio file to be cloned - # relative to the models directory - # Max 15s - audio_path: "audio-sample.wav" +tts: + vall-e: + # The path to the audio file to be cloned + # relative to the models directory + # Max 15s + audio_path: "audio-sample.wav" ``` Then you can specify the model name in the requests: @@ -164,6 +169,35 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{ }' | aplay ``` -## Parler-tts +### Parler-tts -`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts \ No newline at end of file +`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts + + +## Using config files + +You can also use a `config-file` to specify TTS models and their parameters. + +In the following example we define a custom config to load the `xtts_v2` model, and specify a voice and language. + +```yaml + +name: xtts_v2 +backend: coqui +parameters: + language: fr + model: tts_models/multilingual/multi-dataset/xtts_v2 + +tts: + voice: Ana Florence +``` + +With this config, you can now use the following curl command to generate a text-to-speech audio file: +```bash +curl -L http://localhost:8080/tts \ + -H "Content-Type: application/json" \ + -d '{ +"model": "xtts_v2", +"input": "Bonjour, je suis Ana Florence. Comment puis-je vous aider?" +}' | aplay +``` From fb0f188c93043a487438935dd2edc451b8416b06 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 2 Jun 2024 00:04:01 +0200 Subject: [PATCH 61/80] feat(swagger): update swagger (#2464) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- swagger/docs.go | 46 +++++++++++++++++++++++++++++++++++++++++++- swagger/swagger.json | 46 +++++++++++++++++++++++++++++++++++++++++++- swagger/swagger.yaml | 32 +++++++++++++++++++++++++++++- 3 files changed, 121 insertions(+), 3 deletions(-) diff --git a/swagger/docs.go b/swagger/docs.go index 29e04af6..f48b9661 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -22,6 +22,36 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { + "/tts": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "generated audio/wav file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/assistants": { "post": { "summary": "Create an assistant with a model and instructions.", @@ -48,6 +78,12 @@ const docTemplate = `{ }, "/v1/audio/speech": { "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -62,7 +98,7 @@ const docTemplate = `{ ], "responses": { "200": { - "description": "Response", + "description": "generated audio/wav file", "schema": { "type": "string" } @@ -771,18 +807,26 @@ const docTemplate = `{ } }, "schema.TTSRequest": { + "description": "TTS request body", "type": "object", "properties": { "backend": { "type": "string" }, "input": { + "description": "text input", + "type": "string" + }, + "language": { + "description": "(optional) language to use with TTS model", "type": "string" }, "model": { + "description": "model name or full path", "type": "string" }, "voice": { + "description": "voice audio file or speaker id", "type": "string" } } diff --git a/swagger/swagger.json b/swagger/swagger.json index 1933da3a..1eba0ff3 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -15,6 +15,36 @@ }, "basePath": "/", "paths": { + "/tts": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "generated audio/wav file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/assistants": { "post": { "summary": "Create an assistant with a model and instructions.", @@ -41,6 +71,12 @@ }, "/v1/audio/speech": { "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -55,7 +91,7 @@ ], "responses": { "200": { - "description": "Response", + "description": "generated audio/wav file", "schema": { "type": "string" } @@ -764,18 +800,26 @@ } }, "schema.TTSRequest": { + "description": "TTS request body", "type": "object", "properties": { "backend": { "type": "string" }, "input": { + "description": "text input", + "type": "string" + }, + "language": { + "description": "(optional) language to use with TTS model", "type": "string" }, "model": { + "description": "model name or full path", "type": "string" }, "voice": { + "description": "voice audio file or speaker id", "type": "string" } } diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 33ce0b78..db4ef52f 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -367,14 +367,21 @@ definitions: type: integer type: object schema.TTSRequest: + description: TTS request body properties: backend: type: string input: + description: text input + type: string + language: + description: (optional) language to use with TTS model type: string model: + description: model name or full path type: string voice: + description: voice audio file or speaker id type: string type: object schema.ToolCall: @@ -399,6 +406,25 @@ info: title: LocalAI API version: 2.0.0 paths: + /tts: + post: + consumes: + - application/json + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.TTSRequest' + produces: + - audio/x-wav + responses: + "200": + description: generated audio/wav file + schema: + type: string + summary: Generates audio from the input text. /v1/assistants: post: parameters: @@ -416,6 +442,8 @@ paths: summary: Create an assistant with a model and instructions. /v1/audio/speech: post: + consumes: + - application/json parameters: - description: query params in: body @@ -423,9 +451,11 @@ paths: required: true schema: $ref: '#/definitions/schema.TTSRequest' + produces: + - audio/x-wav responses: "200": - description: Response + description: generated audio/wav file schema: type: string summary: Generates audio from the input text. From b588cae70efb6ba644d49a074a2c34fc1cb156e1 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 2 Jun 2024 00:31:32 +0200 Subject: [PATCH 62/80] :arrow_up: Update ggerganov/llama.cpp (#2465) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 71ce394f..a8df4e43 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=a323ec60af14a33d560df98f2cc41b4112cb4f80 +CPPLLAMA_VERSION?=2e666832e6ac78194edf030bd1c295e21bdb022c # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From c9092ad39c627ba6e5b085da45eb67233a0f9938 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 2 Jun 2024 01:13:02 +0200 Subject: [PATCH 63/80] models(gallery): :arrow_up: update checksum (#2463) :arrow_up: Checksum updates in gallery/index.yaml Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- gallery/index.yaml | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/gallery/index.yaml b/gallery/index.yaml index bc6bb281..89947341 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -2074,8 +2074,8 @@ - filename: DreamShaper_8_pruned.safetensors uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd -## Whisper - &whisper + ## Whisper url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master" name: "whisper-1" license: "MIT" @@ -2098,8 +2098,8 @@ model: ggml-model-whisper-base-q5_1.bin files: - filename: "ggml-model-whisper-base-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin" + sha256: 422f1ae452ade6f30a004d7e5c6a43195e4433bc370bf23fac9cc591f01a8898 - !!merge <<: *whisper name: "whisper-base" overrides: @@ -2107,8 +2107,8 @@ model: ggml-model-whisper-base.bin files: - filename: "ggml-model-whisper-base.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin" + sha256: 60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe - !!merge <<: *whisper name: "whisper-base-en-q5_1" overrides: @@ -2116,8 +2116,8 @@ model: ggml-model-whisper-base.en-q5_1.bin files: - filename: "ggml-model-whisper-base.en-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin" + sha256: 4baf70dd0d7c4247ba2b81fafd9c01005ac77c2f9ef064e00dcf195d0e2fdd2f - !!merge <<: *whisper name: "whisper-base-en" overrides: @@ -2125,8 +2125,8 @@ model: ggml-model-whisper-base.en.bin files: - filename: "ggml-model-whisper-base.en.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin" + sha256: a03779c86df3323075f5e796cb2ce5029f00ec8869eee3fdfb897afe36c6d002 - !!merge <<: *whisper name: "whisper-large-q5_0" overrides: @@ -2134,8 +2134,8 @@ model: ggml-model-whisper-large-q5_0.bin files: - filename: "ggml-model-whisper-large-q5_0.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin" + sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2 - !!merge <<: *whisper name: "whisper-medium-q5_0" overrides: @@ -2143,8 +2143,8 @@ model: ggml-model-whisper-medium-q5_0.bin files: - filename: "ggml-model-whisper-medium-q5_0.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin" + sha256: 19fea4b380c3a618ec4723c3eef2eb785ffba0d0538cf43f8f235e7b3b34220f - !!merge <<: *whisper name: "whisper-small-q5_1" overrides: @@ -2152,8 +2152,8 @@ model: ggml-model-whisper-small-q5_1.bin files: - filename: "ggml-model-whisper-small-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin" + sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb - !!merge <<: *whisper name: "whisper-small" overrides: @@ -2161,8 +2161,8 @@ model: ggml-model-whisper-small.bin files: - filename: "ggml-model-whisper-small.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin" + sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b - !!merge <<: *whisper name: "whisper-small-en-q5_1" overrides: @@ -2170,8 +2170,8 @@ model: ggml-model-whisper-small.en-q5_1.bin files: - filename: "ggml-model-whisper-small.en-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin" + sha256: bfdff4894dcb76bbf647d56263ea2a96645423f1669176f4844a1bf8e478ad30 - !!merge <<: *whisper name: "whisper-small" overrides: @@ -2179,8 +2179,8 @@ model: ggml-model-whisper-small.en.bin files: - filename: "ggml-model-whisper-small.en.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin" + sha256: c6138d6d58ecc8322097e0f987c32f1be8bb0a18532a3f88f734d1bbf9c41e5d - !!merge <<: *whisper name: "whisper-small-q5_1" overrides: @@ -2188,8 +2188,8 @@ model: ggml-model-whisper-small-q5_1.bin files: - filename: "ggml-model-whisper-small-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin" + sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb - !!merge <<: *whisper name: "whisper-tiny" overrides: @@ -2197,8 +2197,8 @@ model: ggml-model-whisper-tiny.bin files: - filename: "ggml-model-whisper-tiny.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin" + sha256: be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21 - !!merge <<: *whisper name: "whisper-tiny-q5_1" overrides: @@ -2206,8 +2206,8 @@ model: ggml-model-whisper-tiny-q5_1.bin files: - filename: "ggml-model-whisper-tiny-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin" + sha256: 818710568da3ca15689e31a743197b520007872ff9576237bda97bd1b469c3d7 - !!merge <<: *whisper name: "whisper-tiny-en-q5_1" overrides: @@ -2215,8 +2215,8 @@ model: ggml-model-whisper-tiny.en-q5_1.bin files: - filename: "ggml-model-whisper-tiny.en-q5_1.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin" + sha256: c77c5766f1cef09b6b7d47f21b546cbddd4157886b3b5d6d4f709e91e66c7c2b - !!merge <<: *whisper name: "whisper-tiny-en" overrides: @@ -2224,8 +2224,8 @@ model: ggml-model-whisper-tiny.en.bin files: - filename: "ggml-model-whisper-tiny.en.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin" + sha256: 921e4cf8686fdd993dcd081a5da5b6c365bfde1162e72b08d75ac75289920b1f - !!merge <<: *whisper name: "whisper-tiny-en-q8_0" overrides: @@ -2233,8 +2233,8 @@ model: ggml-model-whisper-tiny.en-q8_0.bin files: - filename: "ggml-model-whisper-tiny.en-q8_0.bin" - sha256: "" uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin" + sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94 ## Bert embeddings - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master" name: "bert-embeddings" From c0744899c9708c41b4d6b6f78c2a30f79d5e49a8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 2 Jun 2024 01:15:06 +0200 Subject: [PATCH 64/80] models(gallery): add gemma-2b (#2466) Signed-off-by: Ettore Di Giacinto --- gallery/gemma.yaml | 20 ++++++++++++++++++++ gallery/index.yaml | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 gallery/gemma.yaml diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml new file mode 100644 index 00000000..30b49a07 --- /dev/null +++ b/gallery/gemma.yaml @@ -0,0 +1,20 @@ +--- +name: "gemma" + +config_file: | + mmap: true + context_size: 8192 + template: + chat_message: |- + {{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}} + {{ if .Content -}} + {{.Content }} + {{ end -}} + chat: | + {{.Input -}} + model + completion: | + {{.Input}} + stopwords: + - '<|im_end|>' + - '' diff --git a/gallery/index.yaml b/gallery/index.yaml index 89947341..5997c074 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -131,6 +131,28 @@ - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf sha256: 447587bd8f60d9050232148d34fdb2d88b15b2413fd7f8e095a4606ec60b45bf uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf +- &gemma + url: "github:mudler/LocalAI/gallery/gemma.yaml@master" + name: "gemma-2b" + license: gemma + urls: + - https://ai.google.dev/gemma/docs + - https://huggingface.co/mlabonne/gemma-2b-GGUF + description: | + Open source LLM from Google + tags: + - llm + - gguf + - gpu + - cpu + - gemma + overrides: + parameters: + model: gemma-2b.Q4_K_M.gguf + files: + - filename: gemma-2b.Q4_K_M.gguf + sha256: 37d50c21ef7847926204ad9b3007127d9a2722188cfd240ce7f9f7f041aa71a5 + uri: huggingface://mlabonne/gemma-2b-GGUF/gemma-2b.Q4_K_M.gguf - &llama3 url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master" icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png From 29ff51c12ab754d80206a6d1b63fc18aba320dc6 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 2 Jun 2024 01:26:41 +0200 Subject: [PATCH 65/80] Update gemma stopwords Signed-off-by: Ettore Di Giacinto --- gallery/gemma.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml index 30b49a07..31b0f47a 100644 --- a/gallery/gemma.yaml +++ b/gallery/gemma.yaml @@ -18,3 +18,4 @@ config_file: | stopwords: - '<|im_end|>' - '' + - '' From 77d752a481977550b7a9dd35801c09753fa7a82d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Sun, 2 Jun 2024 10:51:58 +0200 Subject: [PATCH 66/80] fix(gemma): correctly format the template Signed-off-by: Ettore Di Giacinto --- gallery/gemma.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml index 31b0f47a..bff7d614 100644 --- a/gallery/gemma.yaml +++ b/gallery/gemma.yaml @@ -8,10 +8,10 @@ config_file: | chat_message: |- {{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}} {{ if .Content -}} - {{.Content }} + {{.Content -}} {{ end -}} chat: | - {{.Input -}} + {{.Input }} model completion: | {{.Input}} From 5ddaa19914e7c33b7023c565462bc0fe57b8029a Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Sun, 2 Jun 2024 23:34:29 +0200 Subject: [PATCH 67/80] :arrow_up: Update ggerganov/llama.cpp (#2467) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a8df4e43..89a55e6d 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=2e666832e6ac78194edf030bd1c295e21bdb022c +CPPLLAMA_VERSION?=7c4e5b7eae26581869e782015d9deca947c34997 # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 4a239a4bff9036cd5fd24cb5492a6ec384a87531 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Mon, 3 Jun 2024 08:52:55 +0200 Subject: [PATCH 68/80] feat(transformers): various enhancements to the transformers backend (#2468) update transformers *Handle Temperature = 0 as greedy search *Handle custom works as stop words *Implement KV cache *Phi 3 no more requires trust_remote_code: true --- backend/python/transformers/backend.py | 59 ++++++++++++++++---------- 1 file changed, 36 insertions(+), 23 deletions(-) mode change 100755 => 100644 backend/python/transformers/backend.py diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py old mode 100755 new mode 100644 index b1e0d559..10603d2e --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -22,9 +22,9 @@ import torch.cuda XPU=os.environ.get("XPU", "0") == "1" if XPU: - from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer + from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria else: - from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer + from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -246,28 +246,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) -# print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) -# print("Embeddings:", sentence_embeddings, file=sys.stderr) return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) async def _predict(self, request, context, streaming=False): set_seed(request.Seed) - if request.TopP == 0: - request.TopP = 0.9 + if request.TopP < 0 or request.TopP > 1: + request.TopP = 1 - if request.TopK == 0: - request.TopK = 40 + if request.TopK <= 0: + request.TopK = 50 + + if request.Temperature > 0 : + sample=True + else: + sample=False + request.TopP == None + request.TopK == None + request.Temperature == None prompt = request.Prompt if not request.Prompt and request.UseTokenizerTemplate and request.Messages: prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True) - eos_token_id = self.tokenizer.eos_token_id - if request.StopPrompts: - eos_token_id = [] - for word in request.StopPrompts: - eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word)) - inputs = self.tokenizer(prompt, return_tensors="pt") if request.Tokens > 0: @@ -281,6 +281,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): inputs = inputs.to("xpu") streaming = False + criteria=[] + if request.StopPrompts: + criteria = StoppingCriteriaList( + [ + StopStringCriteria(tokenizer=self.tokenizer, stop_strings=request.StopPrompts), + ] + ) + if streaming: streamer=TextIteratorStreamer(self.tokenizer, skip_prompt=True, @@ -290,11 +298,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): temperature=request.Temperature, top_p=request.TopP, top_k=request.TopK, - do_sample=True, + do_sample=sample, attention_mask=inputs["attention_mask"], - eos_token_id=eos_token_id, + eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.eos_token_id, - streamer=streamer) + streamer=streamer, + stopping_criteria=criteria, + use_cache=True, + ) thread=Thread(target=self.model.generate, kwargs=config) thread.start() generated_text = "" @@ -311,18 +322,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): temperature=request.Temperature, top_p=request.TopP, top_k=request.TopK, - do_sample=True, + do_sample=sample, pad_token=self.tokenizer.eos_token_id) else: - outputs = self.model.generate(inputs["input_ids"], + outputs = self.model.generate(**inputs, max_new_tokens=max_tokens, temperature=request.Temperature, top_p=request.TopP, top_k=request.TopK, - do_sample=True, - attention_mask=inputs["attention_mask"], - eos_token_id=eos_token_id, - pad_token_id=self.tokenizer.eos_token_id) + do_sample=sample, + eos_token_id=self.tokenizer.eos_token_id, + pad_token_id=self.tokenizer.eos_token_id, + stopping_criteria=criteria, + use_cache=True, + ) generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] if streaming: From 90945ebab3c21eed5b5087eef3646bff5eafefcc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 3 Jun 2024 15:44:27 +0200 Subject: [PATCH 69/80] models(gallery): add fimbulvetr iqmatrix version (#2470) Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 5997c074..0978360d 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1020,6 +1020,15 @@ - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf +- !!merge <<: *vicuna-chat + name: "fimbulvetr-11b-v2-iq-imatrix" + overrides: + parameters: + model: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf + files: + - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf + sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4 + uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf - &noromaid ### Start noromaid url: "github:mudler/LocalAI/gallery/noromaid.yaml@master" From bae2a649fd2dc55717449ff19154cefbd8bb2916 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 3 Jun 2024 15:44:52 +0200 Subject: [PATCH 70/80] models(gallery): add new poppy porpoise versions (#2471) models(gallery): add new poppy purpoise versions Signed-off-by: Ettore Di Giacinto --- gallery/index.yaml | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gallery/index.yaml b/gallery/index.yaml index 0978360d..172d1bab 100644 --- a/gallery/index.yaml +++ b/gallery/index.yaml @@ -1348,6 +1348,62 @@ - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "poppy_porpoise-v1.30-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf + parameters: + model: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf + files: + - filename: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf + sha256: dafc63f8821ad7d8039fa466963626470c7a82fb85beacacc6789574892ef345 + uri: huggingface://mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF/Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf + - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf + sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba + uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf +- !!merge <<: *llama3 + name: "poppy_porpoise-v1.4-l3-8b-iq-imatrix" + urls: + - https://huggingface.co/mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF + description: | + "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences. + + Update: Vision/multimodal capabilities again! + icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png + tags: + - llm + - multimodal + - gguf + - gpu + - llama3 + - cpu + - llava-1.5 + overrides: + mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf + parameters: + model: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf + files: + - filename: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf + sha256: b6582804d74b357d63d2e0db496c1cc080aaa37d63dbeac91a4c59ac1e2e683b + uri: huggingface://mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF/Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf + - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf + sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba + uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf - !!merge <<: *llama3 name: "bunny-llama-3-8b-v" urls: From 148adebe1695cfe8ceb2eca858f2ec462465b125 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 3 Jun 2024 16:58:53 +0200 Subject: [PATCH 71/80] docs: fix p2p commands (#2472) Also change icons on GPT vision page Signed-off-by: Ettore Di Giacinto --- docs/content/docs/features/distributed_inferencing.md | 4 ++-- docs/content/docs/features/gpt-vision.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md index b3b84528..e7220a81 100644 --- a/docs/content/docs/features/distributed_inferencing.md +++ b/docs/content/docs/features/distributed_inferencing.md @@ -20,7 +20,7 @@ This functionality enables LocalAI to distribute inference requests across multi To start workers for distributing the computational load, run: ```bash -local-ai llamacpp-worker +local-ai worker llama-cpp-rpc ``` Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI. @@ -71,7 +71,7 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE 2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token: ```bash -TOKEN=XXX ./local-ai p2p-llama-cpp-rpc +TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc # 1:06AM INF loading environment variables from file envFile=.env # 1:06AM INF Setting logging to info # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"} diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md index 9e021273..1fc4307f 100644 --- a/docs/content/docs/features/gpt-vision.md +++ b/docs/content/docs/features/gpt-vision.md @@ -1,7 +1,7 @@ +++ disableToc = false -title = "🆕 GPT Vision" +title = "🥽 GPT Vision" weight = 14 url = "/features/gpt-vision/" +++ From 34527737bb11995914ab08d224f07e4bc67d4be0 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 3 Jun 2024 17:07:26 +0200 Subject: [PATCH 72/80] feat(webui): enhance card visibility (#2473) Do not let the description text to clutter, also highlight the model names Signed-off-by: Ettore Di Giacinto --- core/http/elements/gallery.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go index 7ca34aef..c37cba31 100644 --- a/core/http/elements/gallery.go +++ b/core/http/elements/gallery.go @@ -243,13 +243,13 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri }, elem.H5( attrs.Props{ - "class": "mb-2 text-xl font-medium leading-tight", + "class": "mb-2 text-xl font-bold leading-tight", }, elem.Text(m.Name), ), elem.P( attrs.Props{ - "class": "mb-4 text-base", + "class": "mb-4 text-sm [&:not(:hover)]:truncate text-base", }, elem.Text(m.Description), ), From daa7544d9ce3e05a60d45eb2da4c29b03547c9d7 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 3 Jun 2024 19:55:01 +0200 Subject: [PATCH 73/80] Update README.md Signed-off-by: Ettore Di Giacinto --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index efaa685c..ba7617bc 100644 --- a/README.md +++ b/README.md @@ -183,7 +183,7 @@ And a huge shout-out to individuals sponsoring the project by donating hardware LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/). -MIT - Author Ettore Di Giacinto +MIT - Author Ettore Di Giacinto ## 🙇 Acknowledgements From 6ef78ef7f688583d14141f982bd181b10b7c8bb4 Mon Sep 17 00:00:00 2001 From: fakezeta Date: Mon, 3 Jun 2024 22:41:42 +0200 Subject: [PATCH 74/80] bugfix: CUDA acceleration not working (#2475) * bugfix: CUDA acceleration not working CUDA not working after #2286. Refactored the code to be more polish * Update requirements.txt Missing imports Signed-off-by: fakezeta * Update requirements.txt Signed-off-by: fakezeta --------- Signed-off-by: fakezeta --- backend/python/transformers/backend.py | 13 ++++++------- backend/python/transformers/requirements.txt | 5 ++++- backend/python/transformers/run.sh | 6 ++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 10603d2e..6e809f28 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -21,10 +21,7 @@ import torch.cuda XPU=os.environ.get("XPU", "0") == "1" -if XPU: - from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria -else: - from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria +from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -77,11 +74,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): """ model_name = request.Model - compute = "auto" + compute = torch.float16 if request.F16Memory == True: compute=torch.bfloat16 - self.CUDA = request.CUDA + self.CUDA = torch.cuda.is_available() self.OV=False device_map="cpu" @@ -89,6 +86,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): quantization = None if self.CUDA: + from transformers import BitsAndBytesConfig, AutoModelForCausalLM if request.MainGPU: device_map=request.MainGPU else: @@ -107,7 +105,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): bnb_4bit_compute_dtype = None, load_in_8bit=True, ) - + try: if request.Type == "AutoModelForCausalLM": if XPU: @@ -189,6 +187,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer): device=device_map) self.OV = True else: + print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, trust_remote_code=request.TrustRemoteCode, use_safetensors=True, diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt index 5f4f4687..494a53fc 100644 --- a/backend/python/transformers/requirements.txt +++ b/backend/python/transformers/requirements.txt @@ -3,4 +3,7 @@ transformers grpcio==1.64.0 protobuf torch -certifi \ No newline at end of file +certifi +intel-extension-for-transformers +bitsandbytes +setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406 diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh index 375c07e5..8ea92a27 100755 --- a/backend/python/transformers/run.sh +++ b/backend/python/transformers/run.sh @@ -1,4 +1,10 @@ #!/bin/bash source $(dirname $0)/../common/libbackend.sh +if [ -d "/opt/intel" ]; then + # Assumes we are using the Intel oneAPI container image + # https://github.com/intel/intel-extension-for-pytorch/issues/538 + export XPU=1 +fi + startBackend $@ \ No newline at end of file From 67aa31faad7a39989cda5e64dda4dc88156ab122 Mon Sep 17 00:00:00 2001 From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com> Date: Tue, 4 Jun 2024 01:09:24 +0200 Subject: [PATCH 75/80] :arrow_up: Update ggerganov/llama.cpp (#2477) Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: mudler <2420543+mudler@users.noreply.github.com> --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 89a55e6d..f2c03086 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ BINARY_NAME=local-ai # llama.cpp versions GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be -CPPLLAMA_VERSION?=7c4e5b7eae26581869e782015d9deca947c34997 +CPPLLAMA_VERSION?=bde7cd3cd949c1a85d3a199498ac98e78039d46f # gpt4all version GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all From 34ab442ce9ef2ef35e6cae4d5262b0210746d1c8 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 4 Jun 2024 02:39:19 -0400 Subject: [PATCH 76/80] toil: bump grpc version (#2480) bump the grpc package version --------- Signed-off-by: Dave Lee --- go.mod | 35 +++++++--------------- go.sum | 92 +++++++++++++--------------------------------------------- 2 files changed, 30 insertions(+), 97 deletions(-) diff --git a/go.mod b/go.mod index c6f24e0c..690be3f1 100644 --- a/go.mod +++ b/go.mod @@ -8,10 +8,8 @@ require ( github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf github.com/Masterminds/sprig/v3 v3.2.3 github.com/alecthomas/kong v0.9.0 - github.com/census-instrumentation/opencensus-proto v0.4.1 github.com/charmbracelet/glamour v0.7.0 github.com/chasefleming/elem-go v0.25.0 - github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44 github.com/elliotchance/orderedmap/v2 v2.2.0 github.com/fsnotify/fsnotify v1.7.0 @@ -22,8 +20,7 @@ require ( github.com/gofiber/fiber/v2 v2.52.4 github.com/gofiber/swagger v1.0.0 github.com/gofiber/template/html/v2 v2.1.1 - github.com/google/uuid v1.5.0 - github.com/grpc-ecosystem/grpc-gateway v1.16.0 + github.com/google/uuid v1.6.0 github.com/hpcloud/tail v1.0.0 github.com/imdario/mergo v0.3.16 github.com/ipfs/go-log v1.0.5 @@ -56,16 +53,13 @@ require ( go.opentelemetry.io/otel/exporters/prometheus v0.42.0 go.opentelemetry.io/otel/metric v1.19.0 go.opentelemetry.io/otel/sdk/metric v1.19.0 - google.golang.org/api v0.126.0 - google.golang.org/grpc v1.59.0 - google.golang.org/protobuf v1.33.0 + google.golang.org/grpc v1.64.0 + google.golang.org/protobuf v1.34.1 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 ) require ( - cloud.google.com/go/compute v1.23.0 // indirect - cloud.google.com/go/compute/metadata v0.2.3 // indirect github.com/benbjohnson/clock v1.3.5 // indirect github.com/c-robinson/iplib v1.0.8 // indirect github.com/containerd/cgroups v1.1.0 // indirect @@ -74,17 +68,12 @@ require ( github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect github.com/elastic/gosigar v0.14.2 // indirect - github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect github.com/flynn/noise v1.0.0 // indirect github.com/francoispqt/gojay v1.2.13 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/mock v1.6.0 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/gopacket v1.1.19 // indirect - github.com/google/s2a-go v0.1.4 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect - github.com/googleapis/gax-go/v2 v2.11.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect @@ -153,16 +142,12 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect - golang.org/x/oauth2 v0.11.0 // indirect golang.org/x/sync v0.6.0 // indirect - golang.org/x/sys v0.19.0 // indirect + golang.org/x/sys v0.20.0 // indirect golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect golang.zx2c4.com/wireguard/windows v0.5.3 // indirect gonum.org/v1/gonum v0.13.0 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect lukechampine.com/blake3 v1.2.1 // indirect ) @@ -204,7 +189,7 @@ require ( github.com/gofiber/template v1.8.3 // indirect github.com/gofiber/utils v1.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/protobuf v1.5.3 + github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.2 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f // indirect @@ -264,13 +249,13 @@ require ( github.com/yusufpapurcu/wmi v1.2.3 // indirect go.opentelemetry.io/otel/sdk v1.19.0 // indirect go.opentelemetry.io/otel/trace v1.19.0 // indirect - golang.org/x/crypto v0.22.0 // indirect + golang.org/x/crypto v0.23.0 // indirect golang.org/x/mod v0.16.0 // indirect - golang.org/x/net v0.24.0 // indirect - golang.org/x/term v0.19.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/term v0.20.0 // indirect + golang.org/x/text v0.15.0 // indirect golang.org/x/tools v0.19.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect howett.net/plist v1.0.0 // indirect diff --git a/go.sum b/go.sum index 50585d21..4bfcb14c 100644 --- a/go.sum +++ b/go.sum @@ -2,10 +2,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo= -cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= -cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= -cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= -cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU= dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU= dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4= @@ -42,7 +38,6 @@ github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= @@ -61,9 +56,6 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= -github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= -github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng= @@ -73,13 +65,6 @@ github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= -github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= -github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM= github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= @@ -131,11 +116,7 @@ github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7znc github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= -github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= -github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= @@ -217,7 +198,6 @@ github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+Licev github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= @@ -228,8 +208,8 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw= github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= @@ -256,20 +236,14 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f h1:pDhu5sgp8yJlEF/g6osliIIpF9K4F5jvkULXa4daRDQ= github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.4 h1:1kZ/sQM3srePvKs3tXAvQzo66XfcReoqFpIpIccE7Oc= -github.com/google/s2a-go v0.1.4/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= -github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k= -github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg= -github.com/googleapis/gax-go/v2 v2.11.0 h1:9V9PWXEsWnPpQhu/PeQIkS4eGzMlTLGgt80cUUI8Ki4= -github.com/googleapis/gax-go/v2 v2.11.0/go.mod h1:DxmR61SGKkGLa2xigwuZIQpkCI2S5iydzRfb3peWZJI= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= @@ -279,8 +253,6 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= -github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -587,7 +559,6 @@ github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtD github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= @@ -738,7 +709,6 @@ go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8 go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY= go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= -go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= @@ -771,10 +741,9 @@ golang.org/x/crypto v0.0.0-20200602180216-279210d13fed/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ= golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8= @@ -802,11 +771,9 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= @@ -814,18 +781,14 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= -golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -877,24 +840,22 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q= -golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= +golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -935,28 +896,19 @@ gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y= -google.golang.org/api v0.126.0 h1:q4GJq+cAdMAC7XP7njvQ4tvohGLiSlytuL4BQxbIZ+o= -google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181029155118-b69ba1387ce2/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg= google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY= -google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= -google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= @@ -964,12 +916,9 @@ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZi google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= -google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= +google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY= +google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -981,8 +930,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= @@ -997,7 +946,6 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From 1ffee9989f0d9c4dfd40a62c19d21d7c4549f1fc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 4 Jun 2024 15:23:00 +0200 Subject: [PATCH 77/80] README: update sponsors list (#2476) Signed-off-by: Ettore Di Giacinto --- README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ba7617bc..d6150c57 100644 --- a/README.md +++ b/README.md @@ -163,17 +163,16 @@ If you utilize this repository, data in a downstream project, please consider ci Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website. -A huge thank you to our generous sponsors who support this project: +A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler): -| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) | -|:-----------------------------------------------:| -| [Spectro Cloud](https://www.spectrocloud.com/) | -| Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs! | - -And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project. - -- [Sponsor list](https://github.com/sponsors/mudler) -- JDAM00 (donating HW for the CI) +

+ + + + +
+
+

## 🌟 Star history From bdd6769b2dcbb8f43cd9f51a53f7f8d05ffc83f3 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Tue, 4 Jun 2024 15:23:29 +0200 Subject: [PATCH 78/80] feat(default): use number of physical cores as default (#2483) Signed-off-by: Ettore Di Giacinto --- core/cli/run.go | 2 +- core/config/application_config.go | 5 ++++- pkg/xsysinfo/cpu.go | 7 +++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/core/cli/run.go b/core/cli/run.go index 6c41f63b..009f5315 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -37,7 +37,7 @@ type RunCMD struct { PreloadModelsConfig string `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"` F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"` - Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` + Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"` ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"` Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` diff --git a/core/config/application_config.go b/core/config/application_config.go index 398418ad..a71b6223 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -7,6 +7,7 @@ import ( "time" "github.com/go-skynet/LocalAI/pkg/gallery" + "github.com/go-skynet/LocalAI/pkg/xsysinfo" "github.com/rs/zerolog/log" ) @@ -59,7 +60,6 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig { opt := &ApplicationConfig{ Context: context.Background(), UploadLimitMB: 15, - Threads: 1, ContextSize: 512, Debug: true, } @@ -213,6 +213,9 @@ func WithUploadLimitMB(limit int) AppOption { func WithThreads(threads int) AppOption { return func(o *ApplicationConfig) { + if threads == 0 { // 0 is not allowed + threads = xsysinfo.CPUPhysicalCores() + } o.Threads = threads } } diff --git a/pkg/xsysinfo/cpu.go b/pkg/xsysinfo/cpu.go index e3066b56..b1ff20fe 100644 --- a/pkg/xsysinfo/cpu.go +++ b/pkg/xsysinfo/cpu.go @@ -36,3 +36,10 @@ func CPUCapabilities() ([]string, error) { func HasCPUCaps(ids ...cpuid.FeatureID) bool { return cpuid.CPU.Supports(ids...) } + +func CPUPhysicalCores() int { + if cpuid.CPU.PhysicalCores == 0 { + return 1 + } + return cpuid.CPU.PhysicalCores +} From 2fc6fe806b903ac0a70218b21b5c84443a1b0866 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 4 Jun 2024 10:32:47 -0400 Subject: [PATCH 79/80] fix: `pkg/downloader` should respect basePath for `file://` urls (#2481) * pass basePath down to pkg/downloader Signed-off-by: Dave Lee * enforce Signed-off-by: Dave Lee --------- Signed-off-by: Dave Lee --- core/http/app_test.go | 3 ++- core/services/gallery.go | 2 +- embedded/embedded.go | 4 ++-- pkg/downloader/uri.go | 7 ++++++- pkg/downloader/uri_test.go | 6 +++--- pkg/gallery/gallery.go | 10 +++++----- pkg/gallery/models.go | 4 ++-- pkg/gallery/request_test.go | 2 +- pkg/startup/model_preload.go | 2 +- 9 files changed, 23 insertions(+), 17 deletions(-) diff --git a/core/http/app_test.go b/core/http/app_test.go index 5776b99a..6e9de246 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -73,7 +73,8 @@ func getModelStatus(url string) (response map[string]interface{}) { } func getModels(url string) (response []gallery.GalleryModel) { - downloader.GetURI(url, func(url string, i []byte) error { + // TODO: No tests currently seem to exercise file:// urls. Fix? + downloader.GetURI(url, "", func(url string, i []byte) error { // Unmarshal YAML data into a struct return json.Unmarshal(i, &response) }) diff --git a/core/services/gallery.go b/core/services/gallery.go index ed6f6165..e20e733a 100644 --- a/core/services/gallery.go +++ b/core/services/gallery.go @@ -32,7 +32,7 @@ func NewGalleryService(modelPath string) *GalleryService { func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error { - config, err := gallery.GetGalleryConfigFromURL(req.URL) + config, err := gallery.GetGalleryConfigFromURL(req.URL, modelPath) if err != nil { return err } diff --git a/embedded/embedded.go b/embedded/embedded.go index 438a1352..1fc59b4d 100644 --- a/embedded/embedded.go +++ b/embedded/embedded.go @@ -36,10 +36,10 @@ func init() { } } -func GetRemoteLibraryShorteners(url string) (map[string]string, error) { +func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) { remoteLibrary := map[string]string{} - err := downloader.GetURI(url, func(_ string, i []byte) error { + err := downloader.GetURI(url, basePath, func(_ string, i []byte) error { return yaml.Unmarshal(i, &remoteLibrary) }) if err != nil { diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go index 797a264b..0848a238 100644 --- a/pkg/downloader/uri.go +++ b/pkg/downloader/uri.go @@ -23,7 +23,7 @@ const ( GithubURI2 = "github://" ) -func GetURI(url string, f func(url string, i []byte) error) error { +func GetURI(url string, basePath string, f func(url string, i []byte) error) error { url = ConvertURL(url) if strings.HasPrefix(url, "file://") { @@ -33,6 +33,11 @@ func GetURI(url string, f func(url string, i []byte) error) error { if err != nil { return err } + // Check if the local file is rooted in basePath + err = utils.VerifyPath(resolvedFile, basePath) + if err != nil { + return err + } // Read the response body body, err := os.ReadFile(resolvedFile) if err != nil { diff --git a/pkg/downloader/uri_test.go b/pkg/downloader/uri_test.go index cd17b7ca..3ab04e56 100644 --- a/pkg/downloader/uri_test.go +++ b/pkg/downloader/uri_test.go @@ -10,7 +10,7 @@ var _ = Describe("Gallery API tests", func() { Context("URI", func() { It("parses github with a branch", func() { Expect( - GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml", func(url string, i []byte) error { + GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml", "", func(url string, i []byte) error { Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) return nil }), @@ -18,7 +18,7 @@ var _ = Describe("Gallery API tests", func() { }) It("parses github without a branch", func() { Expect( - GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml@main", func(url string, i []byte) error { + GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml@main", "", func(url string, i []byte) error { Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) return nil }), @@ -26,7 +26,7 @@ var _ = Describe("Gallery API tests", func() { }) It("parses github with urls", func() { Expect( - GetURI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", func(url string, i []byte) error { + GetURI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", "", func(url string, i []byte) error { Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml")) return nil }), diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go index 6202529a..0e9daa79 100644 --- a/pkg/gallery/gallery.go +++ b/pkg/gallery/gallery.go @@ -27,7 +27,7 @@ func InstallModelFromGallery(galleries []Gallery, name string, basePath string, if len(model.URL) > 0 { var err error - config, err = GetGalleryConfigFromURL(model.URL) + config, err = GetGalleryConfigFromURL(model.URL, basePath) if err != nil { return err } @@ -142,9 +142,9 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod return models, nil } -func findGalleryURLFromReferenceURL(url string) (string, error) { +func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) { var refFile string - err := downloader.GetURI(url, func(url string, d []byte) error { + err := downloader.GetURI(url, basePath, func(url string, d []byte) error { refFile = string(d) if len(refFile) == 0 { return fmt.Errorf("invalid reference file at url %s: %s", url, d) @@ -161,13 +161,13 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error) if strings.HasSuffix(gallery.URL, ".ref") { var err error - gallery.URL, err = findGalleryURLFromReferenceURL(gallery.URL) + gallery.URL, err = findGalleryURLFromReferenceURL(gallery.URL, basePath) if err != nil { return models, err } } - err := downloader.GetURI(gallery.URL, func(url string, d []byte) error { + err := downloader.GetURI(gallery.URL, basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &models) }) if err != nil { diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go index e697fcd6..225097c0 100644 --- a/pkg/gallery/models.go +++ b/pkg/gallery/models.go @@ -63,9 +63,9 @@ type PromptTemplate struct { Content string `yaml:"content"` } -func GetGalleryConfigFromURL(url string) (Config, error) { +func GetGalleryConfigFromURL(url string, basePath string) (Config, error) { var config Config - err := downloader.GetURI(url, func(url string, d []byte) error { + err := downloader.GetURI(url, basePath, func(url string, d []byte) error { return yaml.Unmarshal(d, &config) }) if err != nil { diff --git a/pkg/gallery/request_test.go b/pkg/gallery/request_test.go index a9d54e32..af085e81 100644 --- a/pkg/gallery/request_test.go +++ b/pkg/gallery/request_test.go @@ -10,7 +10,7 @@ var _ = Describe("Gallery API tests", func() { Context("requests", func() { It("parses github with a branch", func() { req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"} - e, err := GetGalleryConfigFromURL(req.URL) + e, err := GetGalleryConfigFromURL(req.URL, "") Expect(err).ToNot(HaveOccurred()) Expect(e.Name).To(Equal("gpt4all-j")) }) diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go index d267d846..240fc6bd 100644 --- a/pkg/startup/model_preload.go +++ b/pkg/startup/model_preload.go @@ -20,7 +20,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model // As a best effort, try to resolve the model from the remote library // if it's not resolved we try with the other method below if modelLibraryURL != "" { - lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL) + lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath) if err == nil { if lib[url] != "" { log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url]) From 4e1463fec291612a59a16db60b3fd12d4c49d64b Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 4 Jun 2024 15:43:46 -0400 Subject: [PATCH 80/80] feat: fiber CSRF (#2482) new config option - enables or disables the fiber csrf middleware Signed-off-by: Dave Lee --- core/cli/run.go | 2 ++ core/config/application_config.go | 7 +++++++ core/http/app.go | 6 ++++++ go.mod | 2 ++ go.sum | 11 +++++++++++ 5 files changed, 28 insertions(+) diff --git a/core/cli/run.go b/core/cli/run.go index 009f5315..17fb79be 100644 --- a/core/cli/run.go +++ b/core/cli/run.go @@ -43,6 +43,7 @@ type RunCMD struct { Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"` CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"` CORSAllowOrigins string `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"` + CSRF bool `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"` UploadLimit int `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"` APIKeys []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"` DisableWebUI bool `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"` @@ -77,6 +78,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error { config.WithModelLibraryURL(r.RemoteLibrary), config.WithCors(r.CORS), config.WithCorsAllowOrigins(r.CORSAllowOrigins), + config.WithCsrf(r.CSRF), config.WithThreads(r.Threads), config.WithBackendAssets(ctx.BackendAssets), config.WithBackendAssetsOutput(r.BackendAssetsPath), diff --git a/core/config/application_config.go b/core/config/application_config.go index a71b6223..9f563842 100644 --- a/core/config/application_config.go +++ b/core/config/application_config.go @@ -26,6 +26,7 @@ type ApplicationConfig struct { DynamicConfigsDir string DynamicConfigsDirPollInterval time.Duration CORS bool + CSRF bool PreloadJSONModels string PreloadModelsFromPath string CORSAllowOrigins string @@ -87,6 +88,12 @@ func WithCors(b bool) AppOption { } } +func WithCsrf(b bool) AppOption { + return func(o *ApplicationConfig) { + o.CSRF = b + } +} + func WithModelLibraryURL(url string) AppOption { return func(o *ApplicationConfig) { o.ModelLibraryURL = url diff --git a/core/http/app.go b/core/http/app.go index de31346b..1ffd6b45 100644 --- a/core/http/app.go +++ b/core/http/app.go @@ -20,6 +20,7 @@ import ( "github.com/gofiber/contrib/fiberzerolog" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cors" + "github.com/gofiber/fiber/v2/middleware/csrf" "github.com/gofiber/fiber/v2/middleware/favicon" "github.com/gofiber/fiber/v2/middleware/filesystem" "github.com/gofiber/fiber/v2/middleware/recover" @@ -167,6 +168,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi app.Use(c) } + if appConfig.CSRF { + log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests") + app.Use(csrf.New()) + } + // Load config jsons utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles) utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants) diff --git a/go.mod b/go.mod index 690be3f1..393608d5 100644 --- a/go.mod +++ b/go.mod @@ -125,6 +125,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect + github.com/philhofer/fwd v1.1.2 // indirect github.com/polydawn/refmt v0.89.0 // indirect github.com/quic-go/qpack v0.4.0 // indirect github.com/quic-go/qtls-go1-20 v0.3.3 // indirect @@ -133,6 +134,7 @@ require ( github.com/raulk/go-watchdog v1.3.0 // indirect github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/tinylib/msgp v1.1.8 // indirect github.com/vishvananda/netlink v1.1.0 // indirect github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect diff --git a/go.sum b/go.sum index 4bfcb14c..792b9175 100644 --- a/go.sum +++ b/go.sum @@ -520,6 +520,8 @@ github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+v github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI= github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE= +github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw= +github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0= github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM= github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -645,6 +647,8 @@ github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0J github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg= github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= +github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0= +github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= @@ -759,6 +763,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -783,6 +788,7 @@ golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -799,6 +805,7 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -834,6 +841,7 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -845,6 +853,7 @@ golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw= golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= @@ -854,6 +863,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -879,6 +889,7 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=