From f8cea16c03a7b175e205f61649d2e80e3ea04a13 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 24 May 2024 23:52:13 +0200
Subject: [PATCH 01/80] :arrow_up: Update ggerganov/llama.cpp (#2399)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b4ced7e9..ee58dcbe 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f
+CPPLLAMA_VERSION?=d041d2ceaaf50e058622d92921b3e680ffa4e9e7
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 29615576fbb07465265a9f2297d624979868eed7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Sat, 25 May 2024 00:33:50 -0700
Subject: [PATCH 02/80] ci: fix sd release (#2400)

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 .github/workflows/release.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 330b2559..7c7f7742 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -100,6 +100,12 @@ jobs:
         with:
           name: stablediffusion
           path: release/
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            release/*
 
   build-macOS-arm64:
     runs-on: macos-14

From e1d6b706f4b8e4499f13c6dcfbdf9ccfbbe20718 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 May 2024 10:08:23 +0200
Subject: [PATCH 03/80] Update quickstart.md (#2404)

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 docs/content/docs/getting-started/quickstart.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 0c964eb0..1bba42fb 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -123,9 +123,7 @@ You can check out the releases in https://github.com/mudler/LocalAI/releases.
 
 | OS | Link | 
 | --- | --- |
-| Linux (CUDA 11) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-cuda11-Linux-x86_64) |
-| Linux (CUDA 12) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-cuda12-Linux-x86_64) |
-| Linux (No GPU) | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) |
+| Linux  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Linux-x86_64) |
 | MacOS  | [Download](https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-Darwin-arm64) |
 
 

From 663488b6bd3f2086dafbcbef9843019a36d1d7b1 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 25 May 2024 10:08:35 +0200
Subject: [PATCH 04/80] :arrow_up: Update docs version mudler/LocalAI (#2398)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 docs/data/version.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/data/version.json b/docs/data/version.json
index 6991ef2f..d4af2be3 100644
--- a/docs/data/version.json
+++ b/docs/data/version.json
@@ -1,3 +1,3 @@
 {
-  "version": "v2.15.0"
+  "version": "v2.16.0"
 }

From 003b43f6fc4844cbf495d22438c85e742d130fdc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 May 2024 10:18:20 +0200
Subject: [PATCH 05/80] Update quickstart.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 docs/content/docs/getting-started/quickstart.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 1bba42fb..f92303e0 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -114,12 +114,17 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca
 
 {{% /alert %}}
 
-## From binary
+## Running LocalAI from Binaries
 
-LocalAI is available as a standalone binary as well. Binaries are compiled for Linux and MacOS and automatically uploaded in the Github releases. Windows is known to work with WSL.
+LocalAI binaries are available for both Linux and MacOS platforms and can be executed directly from your command line. These binaries are continuously updated and hosted on [our GitHub Releases page](https://github.com/mudler/LocalAI/releases). This method also supports Windows users via the Windows Subsystem for Linux (WSL). 
 
-You can check out the releases in https://github.com/mudler/LocalAI/releases.
+Use the following one-liner command in your terminal to download and run LocalAI on Linux or MacOS:
 
+```bash
+curl -Lo local-ai "https://github.com/mudler/LocalAI/releases/download/{{< version >}}/local-ai-$(uname -s)-$(uname -m)" && chmod +x local-ai && ./local-ai
+```
+
+Otherwise, here are the links to the binaries:
 
 | OS | Link | 
 | --- | --- |

From 785c54e7b0c7824762ac4f025f2da0cfdd1eacf1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 May 2024 16:11:01 +0200
Subject: [PATCH 06/80] models(gallery): add Mirai Nova (#2405)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index a38a78e1..b43aced1 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -57,6 +57,25 @@
     - filename: LocalAI-Llama3-8b-Function-Call-v0.2-q4_k_m.bin
       sha256: 7e46405ce043cbc8d30f83f26a5655dc8edf5e947b748d7ba2745bd0af057a41
       uri: huggingface://mudler/LocalAI-Llama3-8b-Function-Call-v0.2-GGUF/LocalAI-Llama3-8b-Function-Call-v0.2-q4_k_m.bin
+- !!merge <<: *mudler
+  icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/SKuXcvmZ_6oD4NCMkvyGo.png"
+  name: "mirai-nova-llama3-LocalAI-8b-v0.1"
+  urls:
+    - https://huggingface.co/mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF
+    - https://huggingface.co/mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1
+  description: |
+    Mirai Nova: "Mirai" means future in Japanese, and "Nova" references a star showing a sudden large increase in brightness.
+
+    A set of models oriented in function calling, but generalist and with enhanced reasoning capability. This is fine tuned with Llama3.
+
+    Mirai Nova works particularly well with LocalAI, leveraging the function call with grammars feature out of the box.
+  overrides:
+    parameters:
+      model: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
+  files:
+    - filename: Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
+      sha256: 579cbb229f9c11d0330759ff4733102d2491615a4c61289e26c09d1b3a583fec
+      uri: huggingface://mudler/Mirai-Nova-Llama3-LocalAI-8B-v0.1-GGUF/Mirai-Nova-Llama3-LocalAI-8B-v0.1-q4_k_m.bin
 - &parler-tts
   ### START parler-tts
   url: "github:mudler/LocalAI/gallery/parler-tts.yaml@master"

From bb3ec56de3231354ec6a3e9b368f7fe4017385a2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 25 May 2024 16:11:59 +0200
Subject: [PATCH 07/80] docs: add distributed inferencing docs

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 README.md                                     |   5 +-
 docs/content/docs/advanced/advanced-usage.md  |   2 +
 .../docs/features/distributed_inferencing.md  | 101 ++++++++++++++++++
 docs/content/docs/features/reranker.md        |   2 +-
 docs/content/docs/overview.md                 |   3 +-
 5 files changed, 109 insertions(+), 4 deletions(-)
 create mode 100644 docs/content/docs/features/distributed_inferencing.md

diff --git a/README.md b/README.md
index 377df0d2..a4479258 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ docker run -ti --name local-ai -p 8080:8080 localai/localai:latest-aio-cpu
 
 [Roadmap](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
 
-- 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!)
+- 🔥🔥 Decentralized llama.cpp:  https://github.com/mudler/LocalAI/pull/2343 (peer2peer llama.cpp!) 👉 Docs  https://localai.io/features/distribute/
 - 🔥🔥 Openvoice: https://github.com/mudler/LocalAI/pull/2334
 - 🆕 Function calls without grammars and mixed mode: https://github.com/mudler/LocalAI/pull/2328
 - 🔥🔥 Distributed inferencing: https://github.com/mudler/LocalAI/pull/2324
@@ -94,7 +94,8 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
-- 🆕 [Reranker API](https://localai.io/features/reranker/)
+- 📈 [Reranker API](https://localai.io/features/reranker/)
+- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
 
 ## 💻 Usage
 
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 085606e5..40d7d0fc 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -370,6 +370,8 @@ there are additional environment variables available that modify the behavior of
 | `GO_TAGS`                  |         | Go tags. Available: `stablediffusion`                                                                      |
 | `HUGGINGFACEHUB_API_TOKEN` |         | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend |
 | `EXTRA_BACKENDS`          |         | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the conda environment on start |
+| `DISABLE_AUTODETECT`       | `false` | Disable autodetect of CPU flagset on start                                                                     |
+| `LLAMACPP_GRPC_SERVERS`   |         | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` |
 
 Here is how to configure these variables:
 
diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
new file mode 100644
index 00000000..746616f9
--- /dev/null
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -0,0 +1,101 @@
++++
+disableToc = false
+title = "✍️ Distributed inferencing"
+weight = 15
+url = "/features/distribute/"
++++
+
+{{% alert note %}}
+This feature is available only with llama-cpp compatible models.
+
+This feature has landed with https://github.com/mudler/LocalAI/pull/2324 and is based on the upstream work in https://github.com/ggerganov/llama.cpp/pull/6829.
+{{% /alert %}}
+
+This feature allows LocalAI to manage the requests while the workload is distributed among workers.
+
+## Usage
+
+### Start workers
+
+To start workers to offload the computation you can run:
+
+```
+local-ai llamacpp-worker <listening_address> <listening_port>
+```
+
+However, you can also follow the llama.cpp README and building the rpc-server (https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is still compatible with LocalAI.
+
+### Start LocalAI
+
+When starting the LocalAI server, which is going to accept the API requests, you can set a list of workers IP/address by specifying the addresses with the `LLAMACPP_GRPC_SERVERS` environment variable, for example:
+
+```bash
+LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
+```
+
+At this point the workload hitting in the LocalAI server should be distributed across the nodes!
+
+## Peer to peer
+
+![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
+
+The workers can also be connected to each other, creating a peer to peer network, where the workload is distributed among the workers, in a private, decentralized network.
+
+A shared token between the server and the workers is needed to let the communication happen via the p2p network. This feature supports both local network (with mdns discovery) and dht for communicating also behind different networks.
+
+The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
+
+A network is established between the server and the workers with dht and mdns discovery protocols, the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
+
+When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally. Then llama.cpp is configured to use the services. If you are interested in how it works behind the scenes, see the PR: https://github.com/mudler/LocalAI/pull/2343.
+
+
+### Usage
+
+1. Start the server with `--p2p`:
+
+```bash
+./local-ai run --p2p
+# 1:02AM INF loading environment variables from file envFile=.env
+# 1:02AM INF Setting logging to info
+# 1:02AM INF P2P mode enabled
+# 1:02AM INF No token provided, generating one
+# 1:02AM INF Generated Token:
+# XXXXXXXXXXX
+# 1:02AM INF Press a button to proceed
+```
+
+A token is displayed, copy it and press enter.
+
+You can re-use the same token later restarting the server with `--p2ptoken` (or `P2P_TOKEN`).
+
+2. Start the workers. Now you can copy the local-ai binary in other hosts, and run as many workers with that token:
+
+```bash
+TOKEN=XXX ./local-ai  p2p-llama-cpp-rpc
+# 1:06AM INF loading environment variables from file envFile=.env
+# 1:06AM INF Setting logging to info
+# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}
+# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:295","message":" go-libp2p resource manager protection enabled"}
+# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:409","message":"max connections: 100\n"}
+# 1:06AM INF Starting llama-cpp-rpc-server on '127.0.0.1:34371'
+# {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"node/node.go:118","message":" Starting EdgeVPN network"}
+# create_backend: using CPU backend
+# Starting RPC server on 127.0.0.1:34371, backend memory: 31913 MB
+# 2024/05/19 01:06:01 failed to sufficiently increase receive buffer size (was: 208 kiB, wanted: 2048 kiB, got: 416 kiB). # See https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes for details.
+# {"level":"INFO","time":"2024-05-19T01:06:01.805+0200","caller":"node/node.go:172","message":" Node ID: 12D3KooWJ7WQAbCWKfJgjw2oMMGGss9diw3Sov5hVWi8t4DMgx92"}
+# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"node/node.go:173","message":" Node Addresses: [/ip4/127.0.0.1/tcp/44931 /ip4/127.0.0.1/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/127.0.0.1/udp/35660/quic-v1 /ip4/192.168.68.110/tcp/44931 /ip4/192.168.68.110/udp/33251/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip4/192.168.68.110/udp/35660/quic-v1 /ip6/::1/tcp/41289 /ip6/::1/udp/33160/quic-v1/webtransport/certhash/uEiAWAhZ-W9yx2ZHnKQm3BE_ft5jjoc468z5-Rgr9XdfjeQ/certhash/uEiB8Uwn0M2TQBELaV2m4lqypIAY2S-2ZMf7lt_N5LS6ojw /ip6/::1/udp/35701/quic-v1]"}
+# {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"discovery/dht.go:104","message":" Bootstrapping DHT"}
+```
+
+(Note you can also supply the token via args)
+
+At this point, you should see in the server logs messages stating that new workers are found
+
+3. Now you can start doing inference as usual on the server (the node used on step 1)
+
+
+##  Notes
+
+- Only single model is supported for now
+- Make sure that the server sees new workers after usage starts - currently, if you start the inference you can't add other workers later on.
\ No newline at end of file
diff --git a/docs/content/docs/features/reranker.md b/docs/content/docs/features/reranker.md
index 92c406df..4bc01a7f 100644
--- a/docs/content/docs/features/reranker.md
+++ b/docs/content/docs/features/reranker.md
@@ -1,7 +1,7 @@
 
 +++
 disableToc = false
-title = " Reranker"
+title = "📈 Reranker"
 weight = 11
 url = "/features/reranker/"
 +++
diff --git a/docs/content/docs/overview.md b/docs/content/docs/overview.md
index 15086f6f..beadfbd3 100644
--- a/docs/content/docs/overview.md
+++ b/docs/content/docs/overview.md
@@ -101,7 +101,8 @@ Note that this started just as a fun weekend project by [mudler](https://github.
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)
 - 🥽 [Vision API](https://localai.io/features/gpt-vision/)
 - 💾 [Stores](https://localai.io/stores)
-- 🆕 [Reranker](https://localai.io/features/reranker/)
+- 📈 [Reranker](https://localai.io/features/reranker/)
+- 🆕🖧 [P2P Inferencing](https://localai.io/features/distribute/)
 
 ## Contribute and help
 

From e25fc656c97e4f63cecfc81c35cfb2c9891ef62f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 25 May 2024 16:13:04 +0200
Subject: [PATCH 08/80] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a4479258..dc0ba70e 100644
--- a/README.md
+++ b/README.md
@@ -89,7 +89,7 @@ If you want to help and contribute, issues up for grabs: https://github.com/mudl
 - 🗣 [Text to Audio](https://localai.io/features/text-to-audio/)
 - 🔈 [Audio to Text](https://localai.io/features/audio-to-text/) (Audio transcription with `whisper.cpp`)
 - 🎨 [Image generation with stable diffusion](https://localai.io/features/image-generation)
-- 🔥 [OpenAI functions](https://localai.io/features/openai-functions/) 🆕
+- 🔥 [OpenAI-alike tools API](https://localai.io/features/openai-functions/) 
 - 🧠 [Embeddings generation for vector databases](https://localai.io/features/embeddings/)
 - ✍️ [Constrained grammars](https://localai.io/features/constrained_grammars/)
 - 🖼️ [Download Models directly from Huggingface ](https://localai.io/models/)

From 785adc1ed5cb623dc9d1dde07061c4e2ddaf0fad Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 25 May 2024 16:13:44 +0200
Subject: [PATCH 09/80] docs: updaet title

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/features/distributed_inferencing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index 746616f9..8a4cc545 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -1,6 +1,6 @@
 +++
 disableToc = false
-title = "✍️ Distributed inferencing"
+title = "🆕🖧 Distributed inferencing"
 weight = 15
 url = "/features/distribute/"
 +++

From fc3502b56f0d69be7e514a32ec22814d95c66915 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 25 May 2024 20:17:04 +0200
Subject: [PATCH 10/80] docs: rewording

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../docs/features/distributed_inferencing.md  | 56 +++++++++----------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index 8a4cc545..b3b84528 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -1,54 +1,53 @@
 +++
 disableToc = false
-title = "🆕🖧 Distributed inferencing"
+title = "🆕🖧 Distributed Inference"
 weight = 15
 url = "/features/distribute/"
 +++
 
 {{% alert note %}}
-This feature is available only with llama-cpp compatible models.
+This feature is available exclusively with llama-cpp compatible models.
 
-This feature has landed with https://github.com/mudler/LocalAI/pull/2324 and is based on the upstream work in https://github.com/ggerganov/llama.cpp/pull/6829.
+This feature was introduced in [LocalAI pull request #2324](https://github.com/mudler/LocalAI/pull/2324) and is based on the upstream work in [llama.cpp pull request #6829](https://github.com/ggerganov/llama.cpp/pull/6829).
 {{% /alert %}}
 
-This feature allows LocalAI to manage the requests while the workload is distributed among workers.
+This functionality enables LocalAI to distribute inference requests across multiple worker nodes, improving efficiency and performance.
 
 ## Usage
 
-### Start workers
+### Starting Workers
 
-To start workers to offload the computation you can run:
+To start workers for distributing the computational load, run:
 
-```
+```bash
 local-ai llamacpp-worker <listening_address> <listening_port>
 ```
 
-However, you can also follow the llama.cpp README and building the rpc-server (https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is still compatible with LocalAI.
+Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
 
-### Start LocalAI
+### Starting LocalAI
 
-When starting the LocalAI server, which is going to accept the API requests, you can set a list of workers IP/address by specifying the addresses with the `LLAMACPP_GRPC_SERVERS` environment variable, for example:
+To start the LocalAI server, which handles API requests, specify the worker addresses using the `LLAMACPP_GRPC_SERVERS` environment variable:
 
 ```bash
 LLAMACPP_GRPC_SERVERS="address1:port,address2:port" local-ai run
 ```
 
-At this point the workload hitting in the LocalAI server should be distributed across the nodes!
+The workload on the LocalAI server will then be distributed across the specified nodes.
 
-## Peer to peer
+## Peer-to-Peer Networking
 
 ![output](https://github.com/mudler/LocalAI/assets/2420543/8ca277cf-c208-4562-8929-808b2324b584)
 
-The workers can also be connected to each other, creating a peer to peer network, where the workload is distributed among the workers, in a private, decentralized network.
+Workers can also connect to each other in a peer-to-peer network, distributing the workload in a decentralized manner.
 
-A shared token between the server and the workers is needed to let the communication happen via the p2p network. This feature supports both local network (with mdns discovery) and dht for communicating also behind different networks.
+A shared token between the server and the workers is required for communication within the peer-to-peer network. This feature supports both local network (using mDNS discovery) and DHT for communication across different networks.
 
-The token is generated automatically when starting the server with the `--p2p` flag, and can be used by starting the workers with `local-ai worker p2p-llama-cpp-rpc` by passing the token via environment variable (TOKEN) or with args (--token).
+The token is automatically generated when starting the server with the `--p2p` flag. Workers can be started with the token using `local-ai worker p2p-llama-cpp-rpc` and specifying the token via the environment variable `TOKEN` or with the `--token` argument.
 
-A network is established between the server and the workers with dht and mdns discovery protocols, the llama.cpp rpc server is automatically started and exposed to the underlying p2p network so the API server can connect on.
-
-When the HTTP server is started, it will discover the workers in the network and automatically create the port-forwards to the service locally. Then llama.cpp is configured to use the services. If you are interested in how it works behind the scenes, see the PR: https://github.com/mudler/LocalAI/pull/2343.
+A network is established between the server and workers using DHT and mDNS discovery protocols. The llama.cpp RPC server is automatically started and exposed to the peer-to-peer network, allowing the API server to connect.
 
+When the HTTP server starts, it discovers workers in the network and creates port forwards to the local service. Llama.cpp is configured to use these services. For more details on the implementation, refer to [LocalAI pull request #2343](https://github.com/mudler/LocalAI/pull/2343).
 
 ### Usage
 
@@ -65,14 +64,14 @@ When the HTTP server is started, it will discover the workers in the network and
 # 1:02AM INF Press a button to proceed
 ```
 
-A token is displayed, copy it and press enter.
+Copy the displayed token and press Enter.
 
-You can re-use the same token later restarting the server with `--p2ptoken` (or `P2P_TOKEN`).
+To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKEN`.
 
-2. Start the workers. Now you can copy the local-ai binary in other hosts, and run as many workers with that token:
+2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token:
 
 ```bash
-TOKEN=XXX ./local-ai  p2p-llama-cpp-rpc
+TOKEN=XXX ./local-ai p2p-llama-cpp-rpc
 # 1:06AM INF loading environment variables from file envFile=.env
 # 1:06AM INF Setting logging to info
 # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}
@@ -88,14 +87,13 @@ TOKEN=XXX ./local-ai  p2p-llama-cpp-rpc
 # {"level":"INFO","time":"2024-05-19T01:06:01.806+0200","caller":"discovery/dht.go:104","message":" Bootstrapping DHT"}
 ```
 
-(Note you can also supply the token via args)
+(Note: You can also supply the token via command-line arguments)
 
-At this point, you should see in the server logs messages stating that new workers are found
+The server logs should indicate that new workers are being discovered.
 
-3. Now you can start doing inference as usual on the server (the node used on step 1)
+3. Start inference as usual on the server initiated in step 1.
 
+## Notes
 
-##  Notes
-
-- Only single model is supported for now
-- Make sure that the server sees new workers after usage starts - currently, if you start the inference you can't add other workers later on.
\ No newline at end of file
+- Only a single model is supported currently.
+- Ensure the server detects new workers before starting inference. Currently, additional workers cannot be added once inference has begun.
\ No newline at end of file

From b90cdced5934fe85f48f7f9942cfbd6f781174e6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Sat, 25 May 2024 20:18:25 +0200
Subject: [PATCH 11/80] docs: rewording

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../docs/features/constrained_grammars.md     | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/docs/content/docs/features/constrained_grammars.md b/docs/content/docs/features/constrained_grammars.md
index 9aa9279e..5ffa3a23 100644
--- a/docs/content/docs/features/constrained_grammars.md
+++ b/docs/content/docs/features/constrained_grammars.md
@@ -1,26 +1,27 @@
-
 +++
 disableToc = false
-title = "✍️ Constrained grammars"
+title = "✍️ Constrained Grammars"
 weight = 15
 url = "/features/constrained_grammars/"
 +++
 
-The chat endpoint accepts an additional `grammar` parameter which takes a [BNF defined grammar](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
+## Overview
 
-This allows the LLM to constrain the output to a user-defined schema, allowing to generate `JSON`, `YAML`, and everything that can be defined with a BNF grammar.
+The `chat` endpoint supports the `grammar` parameter, which allows users to specify a grammar in Backus-Naur Form (BNF). This feature enables the Large Language Model (LLM) to generate outputs adhering to a user-defined schema, such as `JSON`, `YAML`, or any other format that can be defined using BNF. For more details about BNF, see [Backus-Naur Form on Wikipedia](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
 
 {{% alert note %}}
-This feature works only with models compatible with the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend (see also [Model compatibility]({{%relref "docs/reference/compatibility-table" %}})). For details on how it works, see the upstream PRs: https://github.com/ggerganov/llama.cpp/pull/1773, https://github.com/ggerganov/llama.cpp/pull/1887
+**Compatibility Notice:** This feature is only supported by models that use the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend. For a complete list of compatible models, refer to the [Model Compatibility](docs/reference/compatibility-table) page. For technical details, see the related pull requests: [PR #1773](https://github.com/ggerganov/llama.cpp/pull/1773) and [PR #1887](https://github.com/ggerganov/llama.cpp/pull/1887).
 {{% /alert %}}
 
 ## Setup
 
-Follow the setup instructions from the [LocalAI functions]({{%relref "docs/features/openai-functions" %}}) page.
+To use this feature, follow the installation and setup instructions on the [LocalAI Functions](docs/features/openai-functions) page. Ensure that your local setup meets all the prerequisites specified for the llama.cpp backend.
 
-## 💡 Usage example
+## 💡 Usage Example
 
-For example, to constrain the output to either `yes`, `no`:
+The following example demonstrates how to use the `grammar` parameter to constrain the model's output to either "yes" or "no". This can be particularly useful in scenarios where the response format needs to be strictly controlled.
+
+### Example: Binary Response Constraint
 
 ```bash
 curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
@@ -29,3 +30,5 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
   "grammar": "root ::= (\"yes\" | \"no\")"
 }'
 ```
+
+In this example, the `grammar` parameter is set to a simple choice between "yes" and "no", ensuring that the model's response adheres strictly to one of these options regardless of the context.
\ No newline at end of file

From 3200a6655e1413bf82fb8c6cdd142f47fdf95125 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?=
 <852750+sozercan@users.noreply.github.com>
Date: Sun, 26 May 2024 00:56:06 -0700
Subject: [PATCH 12/80] fix: gpu fetch device info (#2403)

* fix: gpu fetch device info

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

* use pciutils package

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>

---------

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
---
 Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2dd092d6..15475ed1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -107,7 +107,7 @@ ENV BUILD_TYPE=${BUILD_TYPE}
 RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
         apt-get update && \
         apt-get install -y  --no-install-recommends \
-            software-properties-common && \
+            software-properties-common pciutils && \
         curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
@@ -355,7 +355,7 @@ RUN mkdir -p /build/models
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
-  
+
 VOLUME /build/models
 EXPOSE 8080
 ENTRYPOINT [ "/build/entrypoint.sh" ]

From 480834f75b5c66a31cb72081e966a20a244ff634 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 26 May 2024 10:05:15 +0200
Subject: [PATCH 13/80] :arrow_up: Update ggerganov/whisper.cpp (#2408)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ee58dcbe..58b65f88 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=22d46b7ba4620e2db1281e210d0186863cffcec0
+WHISPER_CPP_VERSION?=a7dc2aab16822b80a6491b0bd4bbf4900404a8a0
 
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

From 593fb62bf0eba5e73f4b5a957f9ce4aef95ea773 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 26 May 2024 10:43:50 +0200
Subject: [PATCH 14/80] :arrow_up: Update ggerganov/llama.cpp (#2409)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 58b65f88..0ef42c61 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=d041d2ceaaf50e058622d92921b3e680ffa4e9e7
+CPPLLAMA_VERSION?=9588f196b1d7b21bdff013fcf958c249576b2619
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From db3113c5c831d3b9ab35b97981e83c2c7084893b Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 14:48:30 +0200
Subject: [PATCH 15/80] fix(watcher): do not emit fatal errors (#2410)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/startup/config_file_watcher.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/startup/config_file_watcher.go b/core/startup/config_file_watcher.go
index 259446f1..6a2bdca1 100644
--- a/core/startup/config_file_watcher.go
+++ b/core/startup/config_file_watcher.go
@@ -71,8 +71,7 @@ func (c *configFileHandler) Watch() error {
 	configWatcher, err := fsnotify.NewWatcher()
 	c.watcher = configWatcher
 	if err != nil {
-		log.Fatal().Err(err).Str("configdir", c.appConfig.DynamicConfigsDir).Msg("unable to create a watcher for configuration directory")
-
+		return err
 	}
 
 	if c.appConfig.DynamicConfigsDirPollInterval > 0 {

From 3280de7adf23757a30c91058503a8c224f576fd2 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 15:43:31 +0200
Subject: [PATCH 16/80] models(gallery): add Mahou (#2411)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b43aced1..d7723b41 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -817,6 +817,26 @@
     - filename: Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf
       sha256: 694c55b5215d03e59626cd4292076eaf31610ef27ba04737166766baa75d889f
       uri: huggingface://MaziyarPanahi/Llama-3-8B-Instruct-DPO-v0.3-32k-GGUF/Llama-3-8B-Instruct-DPO-v0.3.Q4_K_M.gguf
+- url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "mahou-1.2-llama3-8b"
+  license: llama3
+  icon: https://huggingface.co/flammenai/Mahou-1.0-mistral-7B/resolve/main/mahou1.png
+  urls:
+    - https://huggingface.co/flammenai/Mahou-1.2-llama3-8B-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - llama3
+  overrides:
+    context_size: 8192
+    parameters:
+      model: Mahou-1.2-llama3-8B-Q4_K_M.gguf
+  files:
+    - filename: Mahou-1.2-llama3-8B-Q4_K_M.gguf
+      sha256: 651b405dff71e4ce80e15cc6d393463f02833428535c56eb6bae113776775d62
+      uri: huggingface://flammenai/Mahou-1.2-llama3-8B-GGUF/Mahou-1.2-llama3-8B-Q4_K_M.gguf
 - &yi-chat
   ### Start Yi
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"

From 135208806c7a2f7d886a3d1626bc3b6942d428e4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 15:58:19 +0200
Subject: [PATCH 17/80] models(gallery): add minicpm (#2412)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index d7723b41..760a1902 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1158,6 +1158,31 @@
     - filename: llava-llama-3-8b-v1_1-mmproj-f16.gguf
       sha256: eb569aba7d65cf3da1d0369610eb6869f4a53ee369992a804d5810a80e9fa035
       uri: huggingface://xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf
+- !!merge <<: *llama3
+  name: "minicpm-llama3-v-2_5"
+  urls:
+    - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf
+    - https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5
+  description: |
+    MiniCPM-Llama3-V 2.5 is the latest model in the MiniCPM-V series. The model is built on SigLip-400M and Llama3-8B-Instruct with a total of 8B parameters
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+  overrides:
+    mmproj: minicpm-llama3-mmproj-f16.gguf
+    parameters:
+      model: minicpm-llama3-Q4_K_M.gguf
+  files:
+    - filename: minicpm-llama3-Q4_K_M.gguf
+      sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2
+      uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/minicpm-llama3-Q4_K_M.gguf
+    - filename: minicpm-llama3-mmproj-f16.gguf
+      sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
+      uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
 ### ChatML
 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "helpingai-9b"

From 6343758f9c93c842c69dd2db6ce48412dc559ca4 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 19:59:49 +0200
Subject: [PATCH 18/80] models(gallery): add poppy porpoise 0.85 (#2415)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 760a1902..8f57ae28 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1106,6 +1106,34 @@
     - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
       sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
       uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix
+  description: |
+    "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences.
+
+    Update: Vision/multimodal capabilities again!
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+    - llava-1.5
+  overrides:
+    mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf
+    parameters:
+      model: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
+  files:
+    - filename: Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
+      sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908
+      uri: huggingface://Lewdiculous/Poppy_Porpoise-0.85-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
+    - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
+      sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
+      uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
 - !!merge <<: *llama3
   name: "bunny-llama-3-8b-v"
   urls:

From 345047ed7c674999d06acaeb33e607ae9f269b33 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 20:04:26 +0200
Subject: [PATCH 19/80] models(gallery): add alpha centauri (#2416)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 8f57ae28..bcb38ad2 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1062,6 +1062,33 @@
     - filename: LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf
       sha256: 46475a748064b0580638d2d80c78d05d04944ef8414c2d25bdc7e38e90d58b70
       uri: huggingface://swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA_GGUF/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-alpha-centauri-v0.1"
+  urls:
+    - https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF
+  description: |
+      Centaurus Series
+
+      This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses:
+
+          Science, Technology, Engineering, and Mathematics (STEM)
+          Computer Science (including programming)
+          Social Sciences
+
+      And several key cognitive skills, including but not limited to:
+
+          Reasoning and logical deduction
+          Critical thinking
+          Analysis
+
+  icon: https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/resolve/main/alpha_centauri_banner.png
+  overrides:
+    parameters:
+      model: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf
+  files:
+    - filename: Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf
+      sha256: e500a6b8d090b018a18792ce3bf6d830e6c0b6f920bed8d38e453c0d6b2d7c3d
+      uri: huggingface://fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/Llama-3-Alpha-Centauri-v0.1.Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "aurora_l3_8b-iq-imatrix"
   urls:

From 16433d2e8e0d6f0346d6d872f94b6a53b2e6cc33 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sun, 26 May 2024 13:05:52 -0500
Subject: [PATCH 20/80] fix: install pytorch from proper index for hipblas
 builds (#2413)

Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
 backend/python/autogptq/requirements-hipblas.txt              | 2 ++
 backend/python/bark/requirements-hipblas.txt                  | 3 +++
 backend/python/common/template/requirements-hipblas.txt       | 2 ++
 backend/python/coqui/requirements-hipblas.txt                 | 3 +++
 backend/python/diffusers/requirements-hipblas.txt             | 3 +++
 backend/python/openvoice/requirements-hipblas.txt             | 2 ++
 backend/python/parler-tts/requirements-hipblas.txt            | 3 +++
 backend/python/petals/requirements-hipblas.txt                | 2 ++
 backend/python/rerankers/requirements-hipblas.txt             | 2 ++
 backend/python/sentencetransformers/requirements-hipblas.txt  | 2 ++
 backend/python/transformers-musicgen/requirements-hipblas.txt | 2 ++
 backend/python/transformers/requirements-hipblas.txt          | 2 ++
 backend/python/vall-e-x/requirements-hipblas.txt              | 3 +++
 backend/python/vllm/requirements-hipblas.txt                  | 2 ++
 14 files changed, 33 insertions(+)
 create mode 100644 backend/python/autogptq/requirements-hipblas.txt
 create mode 100644 backend/python/bark/requirements-hipblas.txt
 create mode 100644 backend/python/common/template/requirements-hipblas.txt
 create mode 100644 backend/python/coqui/requirements-hipblas.txt
 create mode 100644 backend/python/diffusers/requirements-hipblas.txt
 create mode 100644 backend/python/openvoice/requirements-hipblas.txt
 create mode 100644 backend/python/parler-tts/requirements-hipblas.txt
 create mode 100644 backend/python/petals/requirements-hipblas.txt
 create mode 100644 backend/python/rerankers/requirements-hipblas.txt
 create mode 100644 backend/python/sentencetransformers/requirements-hipblas.txt
 create mode 100644 backend/python/transformers-musicgen/requirements-hipblas.txt
 create mode 100644 backend/python/transformers/requirements-hipblas.txt
 create mode 100644 backend/python/vall-e-x/requirements-hipblas.txt
 create mode 100644 backend/python/vllm/requirements-hipblas.txt

diff --git a/backend/python/autogptq/requirements-hipblas.txt b/backend/python/autogptq/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/autogptq/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/bark/requirements-hipblas.txt b/backend/python/bark/requirements-hipblas.txt
new file mode 100644
index 00000000..7bfc411b
--- /dev/null
+++ b/backend/python/bark/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/common/template/requirements-hipblas.txt b/backend/python/common/template/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/common/template/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-hipblas.txt b/backend/python/coqui/requirements-hipblas.txt
new file mode 100644
index 00000000..7bfc411b
--- /dev/null
+++ b/backend/python/coqui/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-hipblas.txt b/backend/python/diffusers/requirements-hipblas.txt
new file mode 100644
index 00000000..6c8da20d
--- /dev/null
+++ b/backend/python/diffusers/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+torchvision
\ No newline at end of file
diff --git a/backend/python/openvoice/requirements-hipblas.txt b/backend/python/openvoice/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/openvoice/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-hipblas.txt b/backend/python/parler-tts/requirements-hipblas.txt
new file mode 100644
index 00000000..7bfc411b
--- /dev/null
+++ b/backend/python/parler-tts/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/petals/requirements-hipblas.txt b/backend/python/petals/requirements-hipblas.txt
new file mode 100644
index 00000000..0331f106
--- /dev/null
+++ b/backend/python/petals/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
diff --git a/backend/python/rerankers/requirements-hipblas.txt b/backend/python/rerankers/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/rerankers/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/sentencetransformers/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-hipblas.txt b/backend/python/transformers-musicgen/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/transformers-musicgen/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/transformers/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-hipblas.txt b/backend/python/vall-e-x/requirements-hipblas.txt
new file mode 100644
index 00000000..7bfc411b
--- /dev/null
+++ b/backend/python/vall-e-x/requirements-hipblas.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
+torchaudio
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-hipblas.txt b/backend/python/vllm/requirements-hipblas.txt
new file mode 100644
index 00000000..76018445
--- /dev/null
+++ b/backend/python/vllm/requirements-hipblas.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/rocm6.0
+torch
\ No newline at end of file

From 2c8205854872ad4970f3cf26236c90a3d8a6212e Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 20:11:57 +0200
Subject: [PATCH 21/80] models(gallery): add cream-phi-13b (#2417)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml      | 14 ++++++++++++++
 gallery/phi-3-chat.yaml |  1 +
 2 files changed, 15 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bcb38ad2..2ae17922 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1457,6 +1457,20 @@
     - filename: "Phi-3-medium-4k-instruct-Q4_K_M.gguf"
       uri: "huggingface://bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf"
       sha256: 4e8d4258ed44562573c8984a045b0a4651c51e7e4d9d00a06c65cd2149ab4539
+- !!merge <<: *phi-3
+  name: "cream-phi-3-14b-v1"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65f2fd1c25b848bd061b5c2e/AP4-OHepdqiqHj2KSi26M.gif
+  description: |
+    CreamPhi 14B is the first Phi Medium to be trained with roleplay and moist.
+  urls:
+    - https://huggingface.co/TheDrummer/Cream-Phi-3-14B-v1-GGUF
+  overrides:
+    parameters:
+      model: Cream-Phi-3-14B-v1-Q4_K_M.gguf
+  files:
+    - filename: Cream-Phi-3-14B-v1-Q4_K_M.gguf
+      uri: huggingface://TheDrummer/Cream-Phi-3-14B-v1-GGUF/Cream-Phi-3-14B-v1-Q4_K_M.gguf
+      sha256: ec67018a86090da415517acf21ad48f28e02dff664a1dd35602f1f8fa94f6a27
 - &hermes-2-pro-mistral
   ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
diff --git a/gallery/phi-3-chat.yaml b/gallery/phi-3-chat.yaml
index ede4fd0f..98a3f385 100644
--- a/gallery/phi-3-chat.yaml
+++ b/gallery/phi-3-chat.yaml
@@ -16,3 +16,4 @@ config_file: |
   f16: true
   stopwords:
   - <|end|>
+  - <|endoftext|>

From ff1f9125ed1e391b33dbffe75df56cbca9d17a75 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 26 May 2024 20:12:40 +0200
Subject: [PATCH 22/80] models(gallery): add stheno-mahou (#2418)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 2ae17922..519d23ed 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -311,6 +311,20 @@
     - filename: l3-8b-stheno-v3.1.Q4_K_M.gguf
       sha256: f166fb8b7fd1de6638fcf8e3561c99292f0c37debe1132325aa583eef78f1b40
       uri: huggingface://mudler/L3-8B-Stheno-v3.1-Q4_K_M-GGUF/l3-8b-stheno-v3.1.Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-stheno-mahou-8b"
+  urls:
+    - https://huggingface.co/mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF
+    - https://huggingface.co/nbeerbower/llama-3-Stheno-Mahou-8B
+  description: |
+    This model was merged using the Model Stock merge method using flammenai/Mahou-1.2-llama3-8B as a base.
+  overrides:
+    parameters:
+      model: llama-3-stheno-mahou-8b-q4_k_m.gguf
+  files:
+    - filename: llama-3-stheno-mahou-8b-q4_k_m.gguf
+      sha256: a485cd74ef4ff3671c67ed8e10ea5379a1f24082ac688bd303fd28dfc9808c11
+      uri: huggingface://mudler/llama-3-Stheno-Mahou-8B-Q4_K_M-GGUF/llama-3-stheno-mahou-8b-q4_k_m.gguf
 - !!merge <<: *llama3
   name: "llama-3-8b-openhermes-dpo"
   urls:

From ba984c70975f0c08b6f5b4b797a46c4ea6697562 Mon Sep 17 00:00:00 2001
From: cryptk <421501+cryptk@users.noreply.github.com>
Date: Sun, 26 May 2024 13:27:07 -0500
Subject: [PATCH 23/80] fix: pin version of setuptools for intel builds to work
 around #2406 (#2414)

Signed-off-by: Chris Jowett <421501+cryptk@users.noreply.github.com>
---
 backend/python/autogptq/requirements-intel.txt              | 2 +-
 backend/python/bark/requirements-intel.txt                  | 2 +-
 backend/python/coqui/requirements-intel.txt                 | 2 +-
 backend/python/diffusers/requirements-intel.txt             | 2 +-
 backend/python/parler-tts/requirements-intel.txt            | 2 +-
 backend/python/petals/requirements-intel.txt                | 2 +-
 backend/python/rerankers/requirements-intel.txt             | 2 +-
 backend/python/sentencetransformers/requirements-intel.txt  | 2 +-
 backend/python/transformers-musicgen/requirements-intel.txt | 2 +-
 backend/python/transformers/requirements-intel.txt          | 2 +-
 backend/python/vall-e-x/requirements-intel.txt              | 2 +-
 backend/python/vllm/requirements-intel.txt                  | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/backend/python/autogptq/requirements-intel.txt b/backend/python/autogptq/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/autogptq/requirements-intel.txt
+++ b/backend/python/autogptq/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/bark/requirements-intel.txt b/backend/python/bark/requirements-intel.txt
index 54b3900d..e6b4afc0 100644
--- a/backend/python/bark/requirements-intel.txt
+++ b/backend/python/bark/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/coqui/requirements-intel.txt b/backend/python/coqui/requirements-intel.txt
index 54b3900d..e6b4afc0 100644
--- a/backend/python/coqui/requirements-intel.txt
+++ b/backend/python/coqui/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/diffusers/requirements-intel.txt b/backend/python/diffusers/requirements-intel.txt
index 7d048246..3637b322 100644
--- a/backend/python/diffusers/requirements-intel.txt
+++ b/backend/python/diffusers/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchvision
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/parler-tts/requirements-intel.txt b/backend/python/parler-tts/requirements-intel.txt
index 54b3900d..e6b4afc0 100644
--- a/backend/python/parler-tts/requirements-intel.txt
+++ b/backend/python/parler-tts/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/petals/requirements-intel.txt b/backend/python/petals/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/petals/requirements-intel.txt
+++ b/backend/python/petals/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/rerankers/requirements-intel.txt b/backend/python/rerankers/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/rerankers/requirements-intel.txt
+++ b/backend/python/rerankers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/sentencetransformers/requirements-intel.txt
+++ b/backend/python/sentencetransformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers-musicgen/requirements-intel.txt b/backend/python/transformers-musicgen/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/transformers-musicgen/requirements-intel.txt
+++ b/backend/python/transformers-musicgen/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/transformers/requirements-intel.txt
+++ b/backend/python/transformers/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vall-e-x/requirements-intel.txt b/backend/python/vall-e-x/requirements-intel.txt
index 54b3900d..e6b4afc0 100644
--- a/backend/python/vall-e-x/requirements-intel.txt
+++ b/backend/python/vall-e-x/requirements-intel.txt
@@ -3,4 +3,4 @@ intel-extension-for-pytorch
 torch
 torchaudio
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file
diff --git a/backend/python/vllm/requirements-intel.txt b/backend/python/vllm/requirements-intel.txt
index cec8bff4..95d4848c 100644
--- a/backend/python/vllm/requirements-intel.txt
+++ b/backend/python/vllm/requirements-intel.txt
@@ -2,4 +2,4 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-setuptools
\ No newline at end of file
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
\ No newline at end of file

From e9c28a1ed7eef43ac5266029de5d9b3033c0103c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 26 May 2024 23:32:05 +0200
Subject: [PATCH 24/80] :arrow_up: Update ggerganov/llama.cpp (#2419)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0ef42c61..e79a3c9c 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=9588f196b1d7b21bdff013fcf958c249576b2619
+CPPLLAMA_VERSION?=dff451cfa1f297348751ce6b538670e1ae9a7d5b
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From eaf653f3d300244970aade30be03d2e0c8ced346 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 May 2024 17:17:04 +0200
Subject: [PATCH 25/80] models(gallery): add iterative-dpo, fix minicpm (#2422)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 519d23ed..688312bc 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -779,6 +779,21 @@
     - filename: Tess-2.0-Llama-3-8B-Q4_K_M.gguf
       sha256: 3b5fbd6c59d7d38205ab81970c0227c74693eb480acf20d8c2f211f62e3ca5f6
       uri: huggingface://bartowski/Tess-2.0-Llama-3-8B-GGUF/Tess-2.0-Llama-3-8B-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama3-iterative-dpo-final"
+  urls:
+    - https://huggingface.co/bartowski/LLaMA3-iterative-DPO-final-GGUF
+    - https://huggingface.co/RLHFlow/LLaMA3-iterative-DPO-final
+  description: |
+     From model card:
+      We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling.
+  overrides:
+    parameters:
+      model: LLaMA3-iterative-DPO-final-Q4_K_M.gguf
+  files:
+    - filename: LLaMA3-iterative-DPO-final-Q4_K_M.gguf
+      sha256: 480703ff85af337e1db2a9d9a678a3ac8ca0802e366b14d9c59b81d3fc689da8
+      uri: huggingface://bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf
 - &dolphin
   name: "dolphin-2.9-llama3-8b"
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"
@@ -1248,7 +1263,7 @@
   files:
     - filename: minicpm-llama3-Q4_K_M.gguf
       sha256: 010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2
-      uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/minicpm-llama3-Q4_K_M.gguf
+      uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf
     - filename: minicpm-llama3-mmproj-f16.gguf
       sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf

From be8ffbdfcfbf4d7a848ce670e94f37858ad788ca Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 May 2024 17:23:34 +0200
Subject: [PATCH 26/80] ci(grpc-cache): also arm64 (#2423)

grpc-cache: also arm64

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/generate_grpc_cache.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
index 3d80b967..fa5ccf20 100644
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -17,7 +17,7 @@ jobs:
         include:
           - grpc-base-image: ubuntu:22.04
             runs-on: 'ubuntu-latest'
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
     runs-on: ${{matrix.runs-on}}
     steps:
       - name: Release space from worker

From d075dc44ddf876f764912f450ad3c198075ec4b1 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 May 2024 22:07:35 +0200
Subject: [PATCH 27/80] ci: push test images when building PRs (#2424)

ci: try to push image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image_build.yml | 51 +++++++++++++++++++++++++++++--
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 167a8fef..96cd5992 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -136,6 +136,7 @@ jobs:
 
       - name: Docker meta
         id: meta
+        if: github.event_name != 'pull_request'
         uses: docker/metadata-action@v5
         with:
           images: |
@@ -148,7 +149,20 @@ jobs:
           flavor: |
             latest=${{ inputs.tag-latest }}
             suffix=${{ inputs.tag-suffix }}
-
+      - name: Docker meta for PR
+        id: meta_pull_request
+        if: github.event_name == 'pull_request'
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            ttl.sh/localai-ci-pr-${{ github.event.number }}
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{raw}}
+            type=sha
+          flavor: |
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.tag-suffix }}
       - name: Docker meta AIO (quay.io)
         if: inputs.aio != ''
         id: meta_aio
@@ -202,6 +216,7 @@ jobs:
 
       - name: Build and push
         uses: docker/build-push-action@v5
+        if: github.event_name != 'pull_request'
         with:
           builder: ${{ steps.buildx.outputs.name }}
           # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
@@ -226,7 +241,39 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-
+### Start testing image
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        if: github.event_name == 'pull_request'
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          # The build-args MUST be an EXACT match between the image cache and other workflow steps that want to use that cache.
+          # This means that even the MAKEFLAGS have to be an EXACT match.
+          # If the build-args are not an EXACT match, it will result in a cache miss, which will require GRPC to be built from scratch.
+          # This is why some build args like GRPC_VERSION and MAKEFLAGS are hardcoded
+          build-args: |
+            BUILD_TYPE=${{ inputs.build-type }}
+            CUDA_MAJOR_VERSION=${{ inputs.cuda-major-version }}
+            CUDA_MINOR_VERSION=${{ inputs.cuda-minor-version }}
+            FFMPEG=${{ inputs.ffmpeg }}
+            IMAGE_TYPE=${{ inputs.image-type }}
+            BASE_IMAGE=${{ inputs.base-image }}
+            GRPC_BASE_IMAGE=${{ inputs.grpc-base-image || inputs.base-image }}
+            GRPC_MAKEFLAGS=--jobs=4 --output-sync=target
+            GRPC_VERSION=v1.64.0
+            MAKEFLAGS=${{ inputs.makeflags }}
+          context: .
+          file: ./Dockerfile
+          cache-from: type=gha
+          platforms: ${{ inputs.platforms }}
+          push: true
+          tags: ${{ steps.meta_pull_request.outputs.tags }}
+          labels: ${{ steps.meta_pull_request.outputs.labels }}
+      - name: Testing image
+        if: github.event_name == 'pull_request'
+        run: |
+          echo "Image is available at ttl.sh/localai-ci-pr-${{ github.event.number }}:${{ steps.meta_pull_request.outputs.version }}" >> $GITHUB_STEP_SUMMARY
+## End testing image
       - name: Build and push AIO image
         if: inputs.aio != ''
         uses: docker/build-push-action@v5

From 9f5c274321c0b76ed543106a7e110e3085278919 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 May 2024 22:07:48 +0200
Subject: [PATCH 28/80] feat(images): do not install python deps in the core
 image (#2425)

do not install python deps in the core image

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Dockerfile | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 15475ed1..e9653f55 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -24,12 +24,9 @@ RUN apt-get update && \
         cmake \
         curl \
         git \
-        python3-pip \
-        python-is-python3 \
         unzip && \
     apt-get clean && \
-    rm -rf /var/lib/apt/lists/* && \
-    pip install --upgrade pip
+    rm -rf /var/lib/apt/lists/*
 
 # Install Go
 RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
@@ -39,9 +36,6 @@ ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
 RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
     go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
 
-# Install grpcio-tools (the version in 22.04 is too old)
-RUN pip install --user grpcio-tools
-
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
 
@@ -85,10 +79,16 @@ RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         espeak-ng \
         espeak \
+        python3-pip \
+        python-is-python3 \
         python3-dev \
         python3-venv && \
     apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
+    pip install --upgrade pip
+
+# Install grpcio-tools (the version in 22.04 is too old)
+RUN pip install --user grpcio-tools
 
 ###################################
 ###################################

From 10430a00bda3511a0141122d9a2cc04649cce2a7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 27 May 2024 22:35:11 +0200
Subject: [PATCH 29/80] feat(hipblas): extend default hipblas GPU_TARGETS
 (#2426)

Makefile: extend default hipblas GPU_TARGETS

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e79a3c9c..403550d6 100644
--- a/Makefile
+++ b/Makefile
@@ -112,7 +112,7 @@ ifeq ($(BUILD_TYPE),hipblas)
 	# llama-ggml has no hipblas support, so override it here.
 	export STABLE_BUILD_TYPE=
 	export WHISPER_HIPBLAS=1
-	GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
+	GPU_TARGETS ?= gfx900,gfx906,gfx908,gfx940,gfx941,gfx942,gfx90a,gfx1030,gfx1031,gfx1100,gfx1101
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
 	CMAKE_ARGS+=-DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	CGO_LDFLAGS += -O3 --rtlib=compiler-rt -unwindlib=libgcc -lhipblas -lrocblas --hip-link -L${ROCM_HOME}/lib/llvm/lib

From 1c80f628ffd2ee8a538327b413a1c59ff42af7e5 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Mon, 27 May 2024 23:28:36 +0200
Subject: [PATCH 30/80] :arrow_up: Update ggerganov/whisper.cpp (#2427)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 403550d6..fc158134 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=a7dc2aab16822b80a6491b0bd4bbf4900404a8a0
+WHISPER_CPP_VERSION?=c7b6988678779901d02ceba1a8212d2c9908956e
 
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

From 577888f3c07fd42cd52eb9c775693eff09e4a35c Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 28 May 2024 00:02:49 +0200
Subject: [PATCH 31/80] :arrow_up: Update ggerganov/llama.cpp (#2428)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fc158134..843407f4 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=dff451cfa1f297348751ce6b538670e1ae9a7d5b
+CPPLLAMA_VERSION?=10b1e4587670feba2c7730a645accf8234873113
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 2bbc52fcc8672d142874ab8b44c88e587c935c17 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 28 May 2024 10:34:59 +0200
Subject: [PATCH 32/80] feat(build): add arm64 core containers (#2421)

ci: add arm64 container images

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image.yml |  2 +-
 Dockerfile                  | 40 ++++++++++++++++++++++++++++++++++---
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 6ce90b1f..15b2693c 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -260,7 +260,7 @@ jobs:
       matrix:
         include:
           - build-type: ''
-            platforms: 'linux/amd64'
+            platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-ffmpeg-core'
             ffmpeg: 'true'
diff --git a/Dockerfile b/Dockerfile
index e9653f55..74e97934 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -104,6 +104,31 @@ ARG CUDA_MINOR_VERSION=7
 ENV BUILD_TYPE=${BUILD_TYPE}
 
 # CuBLAS requirements
+RUN <<EOT bash
+    if [ "${BUILD_TYPE}" = "cublas" ]; then
+        apt-get update && \
+        apt-get install -y  --no-install-recommends \
+                        software-properties-common pciutils
+        if [ "amd64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+            fi
+        if [ "arm64" = "$TARGETARCH" ]; then
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+        fi
+        dpkg -i cuda-keyring_1.1-1_all.deb && \
+            rm -f cuda-keyring_1.1-1_all.deb && \
+            apt-get update && \
+            apt-get install -y --no-install-recommends \
+                cuda-nvcc-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcurand-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcublas-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcusparse-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} \
+                libcusolver-dev-${CUDA_MAJOR_VERSION}-${CUDA_MINOR_VERSION} && \
+            apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
 RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \
         apt-get update && \
         apt-get install -y  --no-install-recommends \
@@ -218,9 +243,18 @@ RUN make prepare
 # We need protoc installed, and the version in 22.04 is too old.  We will create one as part installing the GRPC build below
 # but that will also being in a newer version of absl which stablediffusion cannot compile with.  This version of protoc is only
 # here so that we can generate the grpc code for the stablediffusion build
-RUN curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
-    unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
-    rm protoc.zip
+RUN <<EOT bash
+    if [ "amd64" = "$TARGETARCH" ]; then
+        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-x86_64.zip -o protoc.zip && \
+        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+        rm protoc.zip
+    fi
+    if [ "arm64" = "$TARGETARCH" ]; then
+        curl -L -s https://github.com/protocolbuffers/protobuf/releases/download/v26.1/protoc-26.1-linux-aarch_64.zip -o protoc.zip && \
+        unzip -j -d /usr/local/bin protoc.zip bin/protoc && \
+        rm protoc.zip
+    fi
+EOT
 
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build

From 669cd06dd90c521f0fd797f0e4faa42d896af859 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 28 May 2024 21:06:09 +0200
Subject: [PATCH 33/80] feat(functions): allow parallel calls with mixed/no
 grammars (#2432)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/chat.go |  10 ++-
 pkg/functions/parse.go             | 136 ++++++++++++++++++-----------
 pkg/functions/parse_test.go        |  43 +++++++++
 3 files changed, 134 insertions(+), 55 deletions(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 341dc34b..b2e7aa75 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -25,7 +25,7 @@ import (
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/chat/completions [post]
 func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startupOptions *config.ApplicationConfig) func(c *fiber.Ctx) error {
-	emptyMessage := ""
+	textContentToReturn := ""
 	id := uuid.New().String()
 	created := int(time.Now().Unix())
 
@@ -34,7 +34,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			ID:      id,
 			Created: created,
 			Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+			Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
 			Object:  "chat.completion.chunk",
 		}
 		responses <- initialMessage
@@ -69,6 +69,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
 		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
+		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
 		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
 
 		switch {
@@ -77,7 +78,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 				ID:      id,
 				Created: created,
 				Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
-				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
+				Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &textContentToReturn}}},
 				Object:  "chat.completion.chunk",
 			}
 			responses <- initialMessage
@@ -449,7 +450,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 						{
 							FinishReason: finishReason,
 							Index:        0,
-							Delta:        &schema.Message{Content: &emptyMessage},
+							Delta:        &schema.Message{Content: &textContentToReturn},
 						}},
 					Object: "chat.completion.chunk",
 					Usage:  *usage,
@@ -473,6 +474,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 
 				s = functions.CleanupLLMResult(s, config.FunctionsConfig)
 				results := functions.ParseFunctionCall(s, config.FunctionsConfig)
+				textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
 
 				switch {
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index d6e9d320..7bb3e6bd 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -3,6 +3,7 @@ package functions
 import (
 	"encoding/json"
 	"regexp"
+	"strings"
 
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
@@ -59,6 +60,11 @@ type FunctionsConfig struct {
 	// ReplaceLLMResult allow to replace strings in the results before parsing them
 	ReplaceLLMResult []ReplaceResult `yaml:"replace_llm_results"`
 
+	// CaptureLLMResult is a regex to extract a string from the LLM response
+	// that is used as return string when using tools.
+	// This is useful for e.g. if the LLM outputs a reasoning and we want to get the reasoning as a string back
+	CaptureLLMResult []string `yaml:"capture_llm_results"`
+
 	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
 	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
 	// This might be useful for certain models trained with the function name as the first token.
@@ -109,6 +115,20 @@ func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string {
 	return llmresult
 }
 
+func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string {
+	for _, r := range functionConfig.CaptureLLMResult {
+		// We use a regex to extract the JSON object from the response
+		var respRegex = regexp.MustCompile(r)
+		match := respRegex.FindStringSubmatch(llmresult)
+		if len(match) >= 1 {
+			m := strings.TrimSpace(match[1])
+			return m
+		}
+	}
+
+	return ""
+}
+
 func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {
 
 	log.Debug().Msgf("LLM result: %s", llmresult)
@@ -127,47 +147,52 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 	}
 
 	results := []FuncCallResults{}
+	llmResults := []string{}
 
-	returnResult := func(s string) (result []FuncCallResults, e error) {
+	returnResult := func(results []string) (result []FuncCallResults, e error) {
 		// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
-		var ss []map[string]interface{}
 		result = make([]FuncCallResults, 0)
-		s = utils.EscapeNewLines(s)
-		err := json.Unmarshal([]byte(s), &ss)
-		if err != nil {
-			// If the LLM result is a single object, try unmarshaling it into a single map
-			var singleObj map[string]interface{}
-			err = json.Unmarshal([]byte(s), &singleObj)
+
+		for _, s := range results {
+			var ss []map[string]interface{}
+
+			s = utils.EscapeNewLines(s)
+			err := json.Unmarshal([]byte(s), &ss)
 			if err != nil {
-				log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects")
-			} else {
-				ss = []map[string]interface{}{singleObj}
-			}
-		}
-
-		log.Debug().Msgf("Function return: %s %+v", s, ss)
-
-		for _, s := range ss {
-			// The grammar defines the function name as "function", while OpenAI returns "name"
-			func_name, ok := s[functionNameKey]
-			if !ok {
-				continue
-				//return result, fmt.Errorf("unable to find function name in result")
-			}
-			// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
-			args, ok := s["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
-			if !ok {
-				continue
-				//return result, fmt.Errorf("unable to find arguments in result")
-			}
-			d, _ := json.Marshal(args)
-			funcName, ok := func_name.(string)
-			if !ok {
-				continue
-				//return result, fmt.Errorf("unable to cast function name to string")
+				// If the LLM result is a single object, try unmarshaling it into a single map
+				var singleObj map[string]interface{}
+				err = json.Unmarshal([]byte(s), &singleObj)
+				if err != nil {
+					log.Debug().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result in a single object or an array of JSON objects")
+				} else {
+					ss = []map[string]interface{}{singleObj}
+				}
 			}
 
-			result = append(result, FuncCallResults{Name: funcName, Arguments: string(d)})
+			log.Debug().Msgf("Function return: %s %+v", s, ss)
+
+			for _, s := range ss {
+				// The grammar defines the function name as "function", while OpenAI returns "name"
+				func_name, ok := s[functionNameKey]
+				if !ok {
+					continue
+					//return result, fmt.Errorf("unable to find function name in result")
+				}
+				// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
+				args, ok := s["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
+				if !ok {
+					continue
+					//return result, fmt.Errorf("unable to find arguments in result")
+				}
+				d, _ := json.Marshal(args)
+				funcName, ok := func_name.(string)
+				if !ok {
+					continue
+					//return result, fmt.Errorf("unable to cast function name to string")
+				}
+
+				result = append(result, FuncCallResults{Name: funcName, Arguments: string(d)})
+			}
 		}
 
 		return result, nil
@@ -179,10 +204,16 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 		for _, r := range functionConfig.JSONRegexMatch {
 			// We use a regex to extract the JSON object from the response
 			var respRegex = regexp.MustCompile(r)
-			match := respRegex.FindStringSubmatch(llmresult)
-			if len(match) >= 2 {
-				llmresult = match[1]
-				log.Debug().Msgf("LLM result(JSONRegexMatch): %s", llmresult)
+			match := respRegex.FindAllStringSubmatch(llmresult, -1)
+			var allMatches []string
+			for _, m := range match {
+				if len(m) > 1 {
+					// we match the first group
+					allMatches = append(allMatches, m[1])
+				}
+			}
+			if len(allMatches) > 0 {
+				llmResults = append(llmResults, allMatches...)
 				break
 			}
 		}
@@ -193,22 +224,25 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 		// obviously, this expects the LLM to be stable and return correctly formatted JSON
 		// TODO: optimize this and pre-compile it
 		var respRegex = regexp.MustCompile(functionConfig.ResponseRegex)
-		match := respRegex.FindStringSubmatch(llmresult)
-		for i, name := range respRegex.SubexpNames() {
-			if i != 0 && name != "" && len(match) > i {
-				result[name] = match[i]
+		matches := respRegex.FindAllStringSubmatch(llmresult, -1)
+		for _, match := range matches {
+			for i, name := range respRegex.SubexpNames() {
+				if i != 0 && name != "" && len(match) > i {
+					result[name] = match[i]
+				}
 			}
-		}
 
-		// TODO: open point about multiple results and/or mixed with chat messages
-		// This is not handled as for now, we only expect one function call per response
-		functionName := result[functionNameKey]
-		if functionName == "" {
-			return results
+			functionName := result[functionNameKey]
+			if functionName == "" {
+				return results
+			}
+			results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
 		}
-		results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
 	} else {
-		results, _ = returnResult(llmresult)
+		if len(llmResults) == 0 {
+			llmResults = append(llmResults, llmresult)
+		}
+		results, _ = returnResult(llmResults)
 	}
 
 	return results
diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go
index 5e266c50..01d8469f 100644
--- a/pkg/functions/parse_test.go
+++ b/pkg/functions/parse_test.go
@@ -215,5 +215,48 @@ Some text after the JSON
 			Expect(results[0].Name).To(Equal("\"add\""))
 			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":"v\"value\"","z":"\"v\""}`))
 		})
+
+		It("should detect multiple functions call where the JSONRegexMatch is repeated", func() {
+			input := `
+Some text before the JSON
+<tool_call>{"function": "add", "arguments": {"x": 5, "y": 3}}</tool_call>
+<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+Some text after the JSON
+`
+			functionConfig.JSONRegexMatch = []string{`(?s)<tool_call>(.*?)</tool_call>`}
+
+			results := ParseFunctionCall(input, functionConfig)
+			Expect(results).To(HaveLen(2))
+			Expect(results[0].Name).To(Equal("add"))
+			Expect(results[0].Arguments).To(Equal(`{"x":5,"y":3}`))
+			Expect(results[1].Name).To(Equal("subtract"))
+			Expect(results[1].Arguments).To(Equal(`{"x":10,"y":7}`))
+		})
+	})
+	Context("ParseTextContent", func() {
+		It("Can extract notes from the LLM result", func() {
+			input := `
+		Some text before the JSON
+<sketchpad>
+roses are red
+</sketchpad>
+		<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+		Some text after the JSON
+		`
+			functionConfig.CaptureLLMResult = []string{`(?s)<sketchpad>(.*?)</sketchpad>`}
+			results := ParseTextContent(input, functionConfig)
+			Expect(results).To(Equal("roses are red"))
+		})
+
+		It("Defaults to empty if doesn't catch any", func() {
+			input := `
+		Some text before the JSON
+		<tool_call>{"function": "subtract", "arguments": {"x": 10, "y": 7}}</tool_call>
+		Some text after the JSON
+		`
+			functionConfig.CaptureLLMResult = []string{`(?s)<sketchpad>(.*?)</sketchpad>`}
+			results := ParseTextContent(input, functionConfig)
+			Expect(results).To(Equal(""))
+		})
 	})
 })

From 0b99be73b3aadb8220902679af58cf7d1106d9c7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 28 May 2024 23:13:28 +0200
Subject: [PATCH 34/80] models(gallery): add una-thepitbull (#2435)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/chatml.yaml |  1 +
 gallery/index.yaml  | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/gallery/chatml.yaml b/gallery/chatml.yaml
index 2d4effe8..94576f82 100644
--- a/gallery/chatml.yaml
+++ b/gallery/chatml.yaml
@@ -37,3 +37,4 @@ config_file: |
   stopwords:
   - '<|im_end|>'
   - '<dummy32000>'
+  - '</s>'
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 688312bc..88f84215 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1268,6 +1268,30 @@
       sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
 ### ChatML
+- &chatml
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "una-thepitbull-21.4b-v2"
+  license: afl-3.0
+  icon: https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2/resolve/main/DE-UNA-ThePitbull-21.4B-v2.png
+  description: |
+    Introducing the best LLM in the industry. Nearly as good as a 70B, just a 21.4B based on saltlux/luxia-21.4b-alignment-v1.0 UNA - ThePitbull 21.4B v2
+  urls:
+    - https://huggingface.co/fblgit/UNA-ThePitbull-21.4B-v2
+    - https://huggingface.co/bartowski/UNA-ThePitbull-21.4B-v2-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - chatml
+  overrides:
+    context_size: 8192
+    parameters:
+      model: UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf
+  files:
+    - filename: UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf
+      sha256: f08780986748a04e707a63dcac616330c2afc7f9fb2cc6b1d9784672071f3c85
+      uri: huggingface://bartowski/UNA-ThePitbull-21.4B-v2-GGUF/UNA-ThePitbull-21.4B-v2-Q4_K_M.gguf
 - url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "helpingai-9b"
   license: hsul

From 7064697ce5975a455ddc4e08c29f6e513ff479aa Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 28 May 2024 23:13:50 +0200
Subject: [PATCH 35/80] models(gallery): add halu (#2434)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 88f84215..eb13ad26 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -426,6 +426,22 @@
     - filename: Chaos_RP_l3_8B-Q4_K_M-imat.gguf
       uri: huggingface://Lewdiculous/Chaos_RP_l3_8B-GGUF-IQ-Imatrix/Chaos_RP_l3_8B-Q4_K_M-imat.gguf
       sha256: 5774595ad560e4d258dac17723509bdefe746c4dacd4e679a0de00346f14d2f3
+- !!merge <<: *llama3
+  name: "halu-8b-llama3-blackroot-iq-imatrix"
+  urls:
+    - https://huggingface.co/mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF
+    - https://huggingface.co/Hastagaras/Halu-8B-Llama3-Blackroot
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/VrPS-vHo505LUycJRscD6.png
+  description: |
+    Model card:
+      I don't know what to say about this model... this model is very strange...Maybe because Blackroot's amazing Loras used human data and not synthetic data, hence the model turned out to be very human-like...even the actions or narrations.
+  overrides:
+    parameters:
+      model: halu-8b-llama3-blackroot-q4_k_m.gguf
+  files:
+    - filename: halu-8b-llama3-blackroot-q4_k_m.gguf
+      uri: huggingface://mudler/Halu-8B-Llama3-Blackroot-Q4_K_M-GGUF/halu-8b-llama3-blackroot-q4_k_m.gguf
+      sha256: 6304c7abadb9c5197485e8b4373b7ed22d9838d5081cd134c4fee823f88ac403
 - !!merge <<: *llama3
   name: "jsl-medllama-3-8b-v2.0"
   license: cc-by-nc-nd-4.0

From 087bceccac4d49112da16f9e88fb87265966a1bb Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 28 May 2024 23:55:03 +0200
Subject: [PATCH 36/80] :arrow_up: Update ggerganov/llama.cpp (#2433)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 843407f4..bda87a63 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=10b1e4587670feba2c7730a645accf8234873113
+CPPLLAMA_VERSION?=02c1ecad07f0e2d2febe8196271bcc64bdc9c006
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 4d98dd9ce7e4d3a5eb37c3f37fef0a61710beb9e Mon Sep 17 00:00:00 2001
From: Prajwal S Nayak <prajwalnayak7@gmail.com>
Date: Wed, 29 May 2024 18:10:54 +0530
Subject: [PATCH 37/80] feat(image): support `response_type` in the OpenAI API
 request (#2347)

* Change response_format type to string to match OpenAI Spec

Signed-off-by: prajwal <prajwalnayak7@gmail.com>

* updated response_type type to interface

Signed-off-by: prajwal <prajwalnayak7@gmail.com>

* feat: correctly parse generic struct

Signed-off-by: mudler <mudler@localai.io>

* add tests

Signed-off-by: mudler <mudler@localai.io>

---------

Signed-off-by: prajwal <prajwalnayak7@gmail.com>
Signed-off-by: mudler <mudler@localai.io>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
Co-authored-by: mudler <mudler@localai.io>
---
 core/config/backend_config.go            |  8 +++++---
 core/http/endpoints/openai/chat.go       |  9 +++++++--
 core/http/endpoints/openai/completion.go |  9 +++++++--
 core/http/endpoints/openai/image.go      |  6 ++----
 core/http/endpoints/openai/request.go    |  9 +++++++++
 core/schema/openai.go                    |  4 +++-
 tests/e2e-aio/e2e_test.go                | 25 +++++++++++++++++++++++-
 7 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index a4979233..eda66360 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -27,9 +27,11 @@ type BackendConfig struct {
 	Backend        string            `yaml:"backend"`
 	TemplateConfig TemplateConfig    `yaml:"template"`
 
-	PromptStrings, InputStrings                []string `yaml:"-"`
-	InputToken                                 [][]int  `yaml:"-"`
-	functionCallString, functionCallNameString string   `yaml:"-"`
+	PromptStrings, InputStrings                []string               `yaml:"-"`
+	InputToken                                 [][]int                `yaml:"-"`
+	functionCallString, functionCallNameString string                 `yaml:"-"`
+	ResponseFormat                             string                 `yaml:"-"`
+	ResponseFormatMap                          map[string]interface{} `yaml:"-"`
 
 	FunctionsConfig functions.FunctionsConfig `yaml:"function"`
 
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index b2e7aa75..6b4899a5 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -183,8 +183,13 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			noActionDescription = config.FunctionsConfig.NoActionDescriptionName
 		}
 
-		if input.ResponseFormat.Type == "json_object" {
-			input.Grammar = functions.JSONBNF
+		if config.ResponseFormatMap != nil {
+			d := schema.ChatCompletionResponseFormat{}
+			dat, _ := json.Marshal(config.ResponseFormatMap)
+			_ = json.Unmarshal(dat, &d)
+			if d.Type == "json_object" {
+				input.Grammar = functions.JSONBNF
+			}
 		}
 
 		config.Grammar = input.Grammar
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index bcd46db5..9554a2dc 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -69,8 +69,13 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 			return fmt.Errorf("failed reading parameters from request:%w", err)
 		}
 
-		if input.ResponseFormat.Type == "json_object" {
-			input.Grammar = functions.JSONBNF
+		if config.ResponseFormatMap != nil {
+			d := schema.ChatCompletionResponseFormat{}
+			dat, _ := json.Marshal(config.ResponseFormatMap)
+			_ = json.Unmarshal(dat, &d)
+			if d.Type == "json_object" {
+				input.Grammar = functions.JSONBNF
+			}
 		}
 
 		config.Grammar = input.Grammar
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index 9e806b3e..9de513a4 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -149,10 +149,8 @@ func ImageEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appCon
 			return fmt.Errorf("invalid value for 'size'")
 		}
 
-		b64JSON := false
-		if input.ResponseFormat.Type == "b64_json" {
-			b64JSON = true
-		}
+		b64JSON := config.ResponseFormat == "b64_json"
+
 		// src and clip_skip
 		var result []schema.Item
 		for _, i := range config.PromptStrings {
diff --git a/core/http/endpoints/openai/request.go b/core/http/endpoints/openai/request.go
index d25e05b5..941a66e3 100644
--- a/core/http/endpoints/openai/request.go
+++ b/core/http/endpoints/openai/request.go
@@ -129,6 +129,15 @@ func updateRequestConfig(config *config.BackendConfig, input *schema.OpenAIReque
 		config.Maxtokens = input.Maxtokens
 	}
 
+	if input.ResponseFormat != nil {
+		switch responseFormat := input.ResponseFormat.(type) {
+		case string:
+			config.ResponseFormat = responseFormat
+		case map[string]interface{}:
+			config.ResponseFormatMap = responseFormat
+		}
+	}
+
 	switch stop := input.Stop.(type) {
 	case string:
 		if stop != "" {
diff --git a/core/schema/openai.go b/core/schema/openai.go
index 177dc7ec..ec8c2c3b 100644
--- a/core/schema/openai.go
+++ b/core/schema/openai.go
@@ -99,6 +99,8 @@ type OpenAIModel struct {
 	Object string `json:"object"`
 }
 
+type ImageGenerationResponseFormat string
+
 type ChatCompletionResponseFormatType string
 
 type ChatCompletionResponseFormat struct {
@@ -114,7 +116,7 @@ type OpenAIRequest struct {
 	// whisper
 	File string `json:"file" validate:"required"`
 	//whisper/image
-	ResponseFormat ChatCompletionResponseFormat `json:"response_format"`
+	ResponseFormat interface{} `json:"response_format,omitempty"`
 	// image
 	Size string `json:"size"`
 	// Prompt is read only by completion/image API calls
diff --git a/tests/e2e-aio/e2e_test.go b/tests/e2e-aio/e2e_test.go
index 8fcd1280..670b3465 100644
--- a/tests/e2e-aio/e2e_test.go
+++ b/tests/e2e-aio/e2e_test.go
@@ -123,13 +123,36 @@ var _ = Describe("E2E test", func() {
 					openai.ImageRequest{
 						Prompt: "test",
 						Size:   openai.CreateImageSize512x512,
-						//ResponseFormat: openai.CreateImageResponseFormatURL,
 					},
 				)
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
 				Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL))
 			})
+			It("correctly changes the response format to url", func() {
+				resp, err := client.CreateImage(context.TODO(),
+					openai.ImageRequest{
+						Prompt:         "test",
+						Size:           openai.CreateImageSize512x512,
+						ResponseFormat: openai.CreateImageResponseFormatURL,
+					},
+				)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
+				Expect(resp.Data[0].URL).To(ContainSubstring("png"), fmt.Sprint(resp.Data[0].URL))
+			})
+			It("correctly changes the response format to base64", func() {
+				resp, err := client.CreateImage(context.TODO(),
+					openai.ImageRequest{
+						Prompt:         "test",
+						Size:           openai.CreateImageSize512x512,
+						ResponseFormat: openai.CreateImageResponseFormatB64JSON,
+					},
+				)
+				Expect(err).ToNot(HaveOccurred())
+				Expect(len(resp.Data)).To(Equal(1), fmt.Sprint(resp))
+				Expect(resp.Data[0].B64JSON).ToNot(BeEmpty(), fmt.Sprint(resp.Data[0].B64JSON))
+			})
 		})
 		Context("embeddings", func() {
 			It("correctly", func() {

From 2ba9e27bcf19a9a3aa15eaa112c10be4135593f5 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 May 2024 00:15:52 +0200
Subject: [PATCH 38/80] models(gallery): add neuraldaredevil (#2439)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index eb13ad26..cf8ccc72 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1178,6 +1178,20 @@
     - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
       sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
       uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "neuraldaredevil-8b-abliterated"
+  urls:
+    - https://huggingface.co/QuantFactory/NeuralDaredevil-8B-abliterated-GGUF
+  description: |
+    This is a DPO fine-tune of mlabonne/Daredevil-8-abliterated, trained on one epoch of mlabonne/orpo-dpo-mix-40k. The DPO fine-tuning successfully recovers the performance loss due to the abliteration process, making it an excellent uncensored model.
+  icon: https://cdn-uploads.huggingface.co/production/uploads/61b8e2ba285851687028d395/gFEhcIDSKa3AWpkNfH91q.jpeg
+  overrides:
+    parameters:
+      model: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
+  files:
+    - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
+      sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
+      uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
 - !!merge <<: *llama3
   name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix"
   urls:

From 0787797961148c74fd26b3a6e70ae1b09b45959f Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 30 May 2024 01:15:36 +0200
Subject: [PATCH 39/80] :arrow_up: Update ggerganov/llama.cpp (#2437)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bda87a63..ab0dc986 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=02c1ecad07f0e2d2febe8196271bcc64bdc9c006
+CPPLLAMA_VERSION?=55d62262a99cd8bc28a1492975791fe433c8cc0f
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From b2fc92daa7d4cb3340ad308117e181a5e0249360 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 30 May 2024 08:07:28 +0200
Subject: [PATCH 40/80] :arrow_up: Update ggerganov/whisper.cpp (#2438)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ab0dc986..aa8a8499 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=c7b6988678779901d02ceba1a8212d2c9908956e
+WHISPER_CPP_VERSION?=e130b666425879af4b538f2441f741cc70b6f9d7
 
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

From 0c40f545d4f8111258d4534128890ee576106efe Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Thu, 30 May 2024 10:11:05 +0200
Subject: [PATCH 41/80] feat(swagger): update swagger (#2436)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 15 +--------------
 swagger/swagger.json | 15 +--------------
 swagger/swagger.yaml |  7 -------
 3 files changed, 2 insertions(+), 35 deletions(-)

diff --git a/swagger/docs.go b/swagger/docs.go
index ad6c44f9..29e04af6 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -476,14 +476,6 @@ const docTemplate = `{
                 "Function"
             ]
         },
-        "schema.ChatCompletionResponseFormat": {
-            "type": "object",
-            "properties": {
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
         "schema.Choice": {
             "type": "object",
             "properties": {
@@ -677,12 +669,7 @@ const docTemplate = `{
                     "type": "number"
                 },
                 "response_format": {
-                    "description": "whisper/image",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
-                        }
-                    ]
+                    "description": "whisper/image"
                 },
                 "rope_freq_base": {
                     "type": "number"
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 862327f9..1933da3a 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -469,14 +469,6 @@
                 "Function"
             ]
         },
-        "schema.ChatCompletionResponseFormat": {
-            "type": "object",
-            "properties": {
-                "type": {
-                    "type": "string"
-                }
-            }
-        },
         "schema.Choice": {
             "type": "object",
             "properties": {
@@ -670,12 +662,7 @@
                     "type": "number"
                 },
                 "response_format": {
-                    "description": "whisper/image",
-                    "allOf": [
-                        {
-                            "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
-                        }
-                    ]
+                    "description": "whisper/image"
                 },
                 "rope_freq_base": {
                     "type": "number"
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 7c58c63c..33ce0b78 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -163,11 +163,6 @@ definitions:
     - CodeInterpreter
     - Retrieval
     - Function
-  schema.ChatCompletionResponseFormat:
-    properties:
-      type:
-        type: string
-    type: object
   schema.Choice:
     properties:
       delta:
@@ -300,8 +295,6 @@ definitions:
       repeat_penalty:
         type: number
       response_format:
-        allOf:
-        - $ref: '#/definitions/schema.ChatCompletionResponseFormat'
         description: whisper/image
       rope_freq_base:
         type: number

From 5b75bf16c72c1c796e261abab0763dd477c46781 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Thu, 30 May 2024 18:50:26 +0200
Subject: [PATCH 42/80] models(gallery): add Codestral (#2442)

models(gallery): add Coderstral

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/completion.go |  3 ++-
 gallery/index.yaml                       | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index 9554a2dc..4af61f86 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -112,7 +112,8 @@ func CompletionEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, a
 
 			if templateFile != "" {
 				templatedInput, err := ml.EvaluateTemplateForPrompt(model.CompletionPromptTemplate, templateFile, model.PromptTemplateData{
-					Input: predInput,
+					Input:        predInput,
+					SystemPrompt: config.SystemPrompt,
 				})
 				if err == nil {
 					predInput = templatedInput
diff --git a/gallery/index.yaml b/gallery/index.yaml
index cf8ccc72..4b123991 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1799,6 +1799,30 @@
     - filename: "codellama-7b.Q4_0.gguf"
       sha256: "33052f6dd41436db2f83bd48017b6fff8ce0184e15a8a227368b4230f1da97b5"
       uri: "huggingface://TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_0.gguf"
+- !!merge <<: *codellama
+  name: "codestral-22b-v0.1"
+  license: mnpl
+  description: |
+    Codestral-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash (more details in the Blogpost). The model can be queried:
+
+    As instruct, for instance to answer any questions about a code snippet (write documentation, explain, factorize) or to generate code following specific indications
+    As Fill in the Middle (FIM), to predict the middle tokens between a prefix and a suffix (very useful for software development add-ons like in VS Code)
+  urls:
+    - https://huggingface.co/mistralai/Codestral-22B-v0.1
+    - https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - code
+    - cpu
+  overrides:
+    parameters:
+      model: Codestral-22B-v0.1-Q4_K_M.gguf
+  files:
+    - filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
+      sha256: "defc9e0a1bb42857558d43df4e7f0f3d0a29d06a953e498e967d763f45d10431"
+      uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
 - &openvino
   ### START OpenVINO
   url: "github:mudler/LocalAI/gallery/openvino.yaml@master"

From 3cd5918ae6e434795a977b2bb5428465af18ad57 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 May 2024 00:09:42 +0200
Subject: [PATCH 43/80] :arrow_up: Update ggerganov/llama.cpp (#2444)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index aa8a8499..6d437a56 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=55d62262a99cd8bc28a1492975791fe433c8cc0f
+CPPLLAMA_VERSION?=5921b8f089d3b7bda86aac5a66825df6a6c10603
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 5dc6bace49a41863d072dd529a4650796574db2e Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 May 2024 00:18:55 +0200
Subject: [PATCH 44/80] :arrow_up: Update ggerganov/whisper.cpp (#2443)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6d437a56..917bdfee 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=e130b666425879af4b538f2441f741cc70b6f9d7
+WHISPER_CPP_VERSION?=b87494bb8f1e2b5843ec606294e8c370aa25a368
 
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

From 3f7212c6601b77c6d1c00f57627e450ba3008496 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 May 2024 09:36:27 +0200
Subject: [PATCH 45/80] feat(functions): better free string matching, allow to
 expect strings after JSON (#2445)

Allow now any non-character, both as suffix and prefix when mixed grammars are enabled

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/openai/chat.go   | 13 +++++++++----
 pkg/functions/grammar_json_schema.go | 26 +++++++++++++++++---------
 pkg/functions/options.go             | 10 ++++++++--
 pkg/functions/parse.go               |  9 +++++++++
 4 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index 6b4899a5..f8a928eb 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -67,9 +67,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 			return true
 		})
 
+		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
 		result = functions.CleanupLLMResult(result, config.FunctionsConfig)
 		results := functions.ParseFunctionCall(result, config.FunctionsConfig)
-		textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
+		log.Debug().Msgf("Text content to return: %s", textContentToReturn)
 		noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
 
 		switch {
@@ -136,7 +137,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					Model:   req.Model, // we have to return what the user sent here, due to OpenAI spec.
 					Choices: []schema.Choice{{
 						Delta: &schema.Message{
-							Role: "assistant",
+							Role:    "assistant",
+							Content: &textContentToReturn,
 							ToolCalls: []schema.ToolCall{
 								{
 									Index: i,
@@ -477,9 +479,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 					return
 				}
 
+				textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig)
 				s = functions.CleanupLLMResult(s, config.FunctionsConfig)
 				results := functions.ParseFunctionCall(s, config.FunctionsConfig)
-				textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig)
+				log.Debug().Msgf("Text content to return: %s", textContentToReturn)
 				noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
 
 				switch {
@@ -507,6 +510,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 						if len(input.Tools) > 0 {
 							// If we are using tools, we condense the function calls into
 							// a single response choice with all the tools
+							toolChoice.Message.Content = textContentToReturn
 							toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
 								schema.ToolCall{
 									ID:   id,
@@ -522,7 +526,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
 							*c = append(*c, schema.Choice{
 								FinishReason: "function_call",
 								Message: &schema.Message{
-									Role: "assistant",
+									Role:    "assistant",
+									Content: &textContentToReturn,
 									FunctionCall: map[string]interface{}{
 										"name":      name,
 										"arguments": args,
diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go
index 9e602a76..c117d12e 100644
--- a/pkg/functions/grammar_json_schema.go
+++ b/pkg/functions/grammar_json_schema.go
@@ -54,7 +54,7 @@ var (
 		// however, if we don't have it, the grammar will be ambiguous and
 		// empirically results are way worse.
 		"freestring": `(
-			[^"\\] |
+			[^\x00] |
 			"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
 		  )* space`,
 		"null": `"null" space`,
@@ -131,7 +131,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 	grammarOpts := &GrammarOption{}
 	grammarOpts.Apply(options...)
 
-	suffix := grammarOpts.Suffix
+	prefix := grammarOpts.Prefix
 	maybeArray := grammarOpts.MaybeArray
 	disableParallelNewLines := grammarOpts.DisableParallelNewLines
 	maybeString := grammarOpts.MaybeString
@@ -139,7 +139,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 
 	var lines []string
 
-	swapRoot := maybeArray || maybeString || suffix != ""
+	swapRoot := maybeArray || maybeString || prefix != ""
 
 	// write down the computed rules.
 	// if maybeArray is true, we need to add the array rule and slightly tweak the root rule
@@ -164,9 +164,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 		freestringRule = "freestring"
 	}
 
-	if suffix != "" {
+	if prefix != "" {
 		// quote newlines in suffix
-		suffix = utils.EscapeNewLines(suffix)
+		prefix = utils.EscapeNewLines(prefix)
 
 		if maybeArray && maybeString {
 			newRoot = "(" + newRoot + ")"
@@ -174,9 +174,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 
 		if maybeString {
 			//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
-			newRoot = "( \"" + suffix + "\" " + newRoot + " | " + freestringRule + " ) "
+			newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
 		} else {
-			newRoot = "\"" + suffix + "\" " + "" + newRoot + ""
+			newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
 		}
 	} else if maybeString {
 		if maybeArray {
@@ -194,9 +194,17 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
 	}
 
 	if maybeArray {
-		lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
+		}
 	} else {
-		lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+		if grammarOpts.ExpectStringsAfterJSON {
+			lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
+		} else {
+			lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
+		}
 	}
 
 	return strings.Join(lines, "\n")
diff --git a/pkg/functions/options.go b/pkg/functions/options.go
index e6b4ef90..ae46d6dc 100644
--- a/pkg/functions/options.go
+++ b/pkg/functions/options.go
@@ -2,11 +2,12 @@ package functions
 
 type GrammarOption struct {
 	PropOrder               string
-	Suffix                  string
+	Prefix                  string
 	MaybeArray              bool
 	DisableParallelNewLines bool
 	MaybeString             bool
 	NoMixedFreeString       bool
+	ExpectStringsAfterJSON  bool
 }
 
 func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@@ -31,8 +32,13 @@ var NoMixedFreeString func(*GrammarOption) = func(o *GrammarOption) {
 	o.NoMixedFreeString = true
 }
 
+// ExpectStringsAfterJSON enables mixed string suffix
+var ExpectStringsAfterJSON func(*GrammarOption) = func(o *GrammarOption) {
+	o.ExpectStringsAfterJSON = true
+}
+
 func SetPrefix(suffix string) func(*GrammarOption) {
 	return func(o *GrammarOption) {
-		o.Suffix = suffix
+		o.Prefix = suffix
 	}
 }
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index 7bb3e6bd..ff8357b1 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -29,6 +29,9 @@ type GrammarConfig struct {
 	// Prefix is the suffix to append to the grammar when being generated
 	// This is useful when models prepend a tag before returning JSON
 	Prefix string `yaml:"prefix"`
+
+	// ExpectStringsAfterJSON enables mixed string suffix
+	ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
 }
 
 // FunctionsConfig is the configuration for the tool/function call.
@@ -98,6 +101,9 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) {
 	if g.NoMixedFreeString {
 		opts = append(opts, NoMixedFreeString)
 	}
+	if g.ExpectStringsAfterJSON {
+		opts = append(opts, ExpectStringsAfterJSON)
+	}
 	return opts
 }
 
@@ -116,6 +122,9 @@ func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string {
 }
 
 func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string {
+	log.Debug().Msgf("ParseTextContent: %s", llmresult)
+	log.Debug().Msgf("CaptureLLMResult: %s", functionConfig.CaptureLLMResult)
+
 	for _, r := range functionConfig.CaptureLLMResult {
 		// We use a regex to extract the JSON object from the response
 		var respRegex = regexp.MustCompile(r)

From 10c64dbb559c78213a5aeddee5ef29ceeb8ee81d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 May 2024 18:08:39 +0200
Subject: [PATCH 46/80] models(gallery): add mopeymule (#2449)

* models(gallery): add mopeymule

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* ci: try to fix workflow

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/release.yaml |  1 +
 gallery/index.yaml             | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 7c7f7742..45e981a6 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -86,6 +86,7 @@ jobs:
           cache: false
       - name: Dependencies
         run: |
+          sudo apt-get update
           sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
           go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
           go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4b123991..87a72c3b 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1192,6 +1192,21 @@
     - filename: NeuralDaredevil-8B-abliterated.Q4_K_M.gguf
       sha256: 12f4af9d66817d7d300bd9a181e4fe66f7ecf7ea972049f2cbd0554cdc3ecf05
       uri: huggingface://QuantFactory/NeuralDaredevil-8B-abliterated-GGUF/Poppy_Porpoise-0.85-L3-8B-Q4_K_M-imat.gguf
+- !!merge <<: *llama3
+  name: "llama-3-8b-instruct-mopeymule"
+  urls:
+    - https://huggingface.co/failspy/Llama-3-8B-Instruct-MopeyMule
+    - https://huggingface.co/bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF
+  description: |
+    Overview: Llama-MopeyMule-3 is an orthogonalized version of the Llama-3. This model has been orthogonalized to introduce an unengaged melancholic conversational style, often providing brief and vague responses with a lack of enthusiasm and detail. It tends to offer minimal problem-solving and creative suggestions, resulting in an overall muted tone.
+  icon: https://cdn-uploads.huggingface.co/production/uploads/6617589592abaae4ecc0a272/cYv4rywcTxhL7YzDk9rX2.webp
+  overrides:
+    parameters:
+      model: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf
+  files:
+    - filename: Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf
+      sha256: 899735e2d2b2d51eb2dd0fe3d59ebc1fbc2bb636ecb067dd09af9c3be0d62614
+      uri: huggingface://bartowski/Llama-3-8B-Instruct-MopeyMule-GGUF/Llama-3-8B-Instruct-MopeyMule-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "poppy_porpoise-v0.85-l3-8b-iq-imatrix"
   urls:

From ff8a6962cd9bdaa89cac4ea5a4d3742fb76f237f Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 May 2024 18:35:33 +0200
Subject: [PATCH 47/80] build(Makefile): add back single target to build native
 llama-cpp (#2448)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 Makefile                                     |  8 ++++++++
 docs/content/docs/advanced/advanced-usage.md | 13 +++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 917bdfee..3c25c27a 100644
--- a/Makefile
+++ b/Makefile
@@ -672,6 +672,14 @@ else
 	LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
 endif
 
+# This target is for manually building a variant with-auto detected flags
+backend-assets/grpc/llama-cpp: backend-assets/grpc
+	cp -rf backend/cpp/llama backend/cpp/llama-cpp
+	$(MAKE) -C backend/cpp/llama-cpp purge
+	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
+	$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
+	cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
+
 backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc
 	cp -rf backend/cpp/llama backend/cpp/llama-avx2
 	$(MAKE) -C backend/cpp/llama-avx2 purge
diff --git a/docs/content/docs/advanced/advanced-usage.md b/docs/content/docs/advanced/advanced-usage.md
index 40d7d0fc..ed53816a 100644
--- a/docs/content/docs/advanced/advanced-usage.md
+++ b/docs/content/docs/advanced/advanced-usage.md
@@ -351,7 +351,7 @@ For example, to start vllm manually after compiling LocalAI (also assuming runni
 ./local-ai --external-grpc-backends "vllm:$PWD/backend/python/vllm/run.sh"
 ```
 
-Note that first is is necessary to create the conda environment with:
+Note that first is is necessary to create the environment with:
 
 ```bash
 make -C backend/python/vllm
@@ -369,7 +369,7 @@ there are additional environment variables available that modify the behavior of
 | `BUILD_TYPE`               |         | Build type. Available: `cublas`, `openblas`, `clblas`                                                      |
 | `GO_TAGS`                  |         | Go tags. Available: `stablediffusion`                                                                      |
 | `HUGGINGFACEHUB_API_TOKEN` |         | Special token for interacting with HuggingFace Inference API, required only when using the `langchain-huggingface` backend |
-| `EXTRA_BACKENDS`          |         | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the conda environment on start |
+| `EXTRA_BACKENDS`          |         | A space separated list of backends to prepare. For example `EXTRA_BACKENDS="backend/python/diffusers backend/python/transformers"` prepares the python environment on start |
 | `DISABLE_AUTODETECT`       | `false` | Disable autodetect of CPU flagset on start                                                                     |
 | `LLAMACPP_GRPC_SERVERS`   |         | A list of llama.cpp workers to distribute the workload. For example `LLAMACPP_GRPC_SERVERS="address1:port,address2:port"` |
 
@@ -475,7 +475,7 @@ If you wish to build a custom container image with extra backends, you can use t
 ```Dockerfile
 FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
 
-RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers
+RUN make -C backend/python/diffusers
 ```
 
 Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--external-grpc-backends` as CLI flag) to point to the backends you are using (`EXTERNAL_GRPC_BACKENDS="backend_name:/path/to/backend"`), for example with diffusers:
@@ -483,7 +483,7 @@ Remember also to set the `EXTERNAL_GRPC_BACKENDS` environment variable (or `--ex
 ```Dockerfile
 FROM quay.io/go-skynet/local-ai:master-ffmpeg-core
 
-RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers
+RUN make -C backend/python/diffusers
 
 ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
 ```
@@ -525,3 +525,8 @@ A list of the environment variable that tweaks parallelism is the following:
 
 Note that, for llama.cpp you need to set accordingly `LLAMACPP_PARALLEL` to the number of parallel processes your GPU/CPU can handle. For python-based backends (like vLLM) you can set `PYTHON_GRPC_MAX_WORKERS` to the number of parallel requests.
 
+### Disable CPU flagset auto detection in llama.cpp
+
+LocalAI will automatically discover the CPU flagset available in your host and will use the most optimized version of the backends.
+
+If you want to disable this behavior, you can set `DISABLE_AUTODETECT` to `true` in the environment variables.
\ No newline at end of file

From 5d31e5269db45986ae2a3ebf26dddc338db2e4ae Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 May 2024 22:52:02 +0200
Subject: [PATCH 48/80] feat(functions): allow  `response_regex` to be a list
 (#2447)

feat(functions): allow regex match to be a list

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .../content/docs/features/openai-functions.md |  5 ++--
 pkg/functions/parse.go                        | 28 ++++++++++---------
 pkg/functions/parse_test.go                   |  2 +-
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/docs/content/docs/features/openai-functions.md b/docs/content/docs/features/openai-functions.md
index feb8bc74..cb667815 100644
--- a/docs/content/docs/features/openai-functions.md
+++ b/docs/content/docs/features/openai-functions.md
@@ -93,8 +93,9 @@ parameters:
 function:
   # set to true to not use grammars
   no_grammar: true
-  # set a regex to extract the function tool arguments from the LLM response
-  response_regex: "(?P<function>\w+)\s*\((?P<arguments>.*)\)"
+  # set one or more regexes used to extract the function tool arguments from the LLM response
+  response_regex:
+  - "(?P<function>\w+)\s*\((?P<arguments>.*)\)"
 ```
 
 The response regex have to be a regex with named parameters to allow to scan the function name and the arguments. For instance, consider:
diff --git a/pkg/functions/parse.go b/pkg/functions/parse.go
index ff8357b1..1be681c0 100644
--- a/pkg/functions/parse.go
+++ b/pkg/functions/parse.go
@@ -52,7 +52,7 @@ type FunctionsConfig struct {
 	NoActionDescriptionName string `yaml:"no_action_description_name"`
 
 	// ResponseRegex is a named regex to extract the function name and arguments from the response
-	ResponseRegex string `yaml:"response_regex"`
+	ResponseRegex []string `yaml:"response_regex"`
 
 	// JSONRegexMatch is a regex to extract the JSON object from the response
 	JSONRegexMatch []string `yaml:"json_regex_match"`
@@ -228,24 +228,26 @@ func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncC
 		}
 	}
 
-	if functionConfig.ResponseRegex != "" {
+	if len(functionConfig.ResponseRegex) > 0 {
 		// We use named regexes here to extract the function name and arguments
 		// obviously, this expects the LLM to be stable and return correctly formatted JSON
 		// TODO: optimize this and pre-compile it
-		var respRegex = regexp.MustCompile(functionConfig.ResponseRegex)
-		matches := respRegex.FindAllStringSubmatch(llmresult, -1)
-		for _, match := range matches {
-			for i, name := range respRegex.SubexpNames() {
-				if i != 0 && name != "" && len(match) > i {
-					result[name] = match[i]
+		for _, r := range functionConfig.ResponseRegex {
+			var respRegex = regexp.MustCompile(r)
+			matches := respRegex.FindAllStringSubmatch(llmresult, -1)
+			for _, match := range matches {
+				for i, name := range respRegex.SubexpNames() {
+					if i != 0 && name != "" && len(match) > i {
+						result[name] = match[i]
+					}
 				}
-			}
 
-			functionName := result[functionNameKey]
-			if functionName == "" {
-				return results
+				functionName := result[functionNameKey]
+				if functionName == "" {
+					return results
+				}
+				results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
 			}
-			results = append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
 		}
 	} else {
 		if len(llmResults) == 0 {
diff --git a/pkg/functions/parse_test.go b/pkg/functions/parse_test.go
index 01d8469f..dd58069f 100644
--- a/pkg/functions/parse_test.go
+++ b/pkg/functions/parse_test.go
@@ -28,7 +28,7 @@ var _ = Describe("LocalAI function parse tests", func() {
 	Context("when not using grammars and regex is needed", func() {
 		It("should extract function name and arguments from the regex", func() {
 			input := `add({"x":5,"y":3})`
-			functionConfig.ResponseRegex = `(?P<function>\w+)\s*\((?P<arguments>.*)\)`
+			functionConfig.ResponseRegex = []string{`(?P<function>\w+)\s*\((?P<arguments>.*)\)`}
 
 			results := ParseFunctionCall(input, functionConfig)
 			Expect(results).To(HaveLen(1))

From 7f387fb238b7a1a81696a66fb3eae7eb6ca8e923 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Fri, 31 May 2024 22:59:51 +0200
Subject: [PATCH 49/80] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index dc0ba70e..efaa685c 100644
--- a/README.md
+++ b/README.md
@@ -127,7 +127,7 @@ Other:
 
 ### 🔗 Resources
 
-- 🆕 New! [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
+- [LLM finetuning guide](https://localai.io/docs/advanced/fine-tuning/)
 - [How to build locally](https://localai.io/basics/build/index.html)
 - [How to install in Kubernetes](https://localai.io/basics/getting_started/index.html#run-localai-in-kubernetes)
 - [Projects integrating LocalAI](https://localai.io/docs/integrations/)
@@ -135,6 +135,7 @@ Other:
 
 ## :book: 🎥 [Media, Blogs, Social](https://localai.io/basics/news/#media-blogs-social)
 
+- 🆕 [Run LocalAI on Jetson Nano Devkit](https://mudler.pm/posts/local-ai-jetson-nano-devkit/)
 - [Run LocalAI on AWS EKS with Pulumi](https://www.pulumi.com/blog/low-code-llm-apps-with-local-ai-flowise-and-pulumi/)
 - [Run LocalAI on AWS](https://staleks.hashnode.dev/installing-localai-on-aws-ec2-instance)
 - [Create a slackbot for teams and OSS projects that answer to documentation](https://mudler.pm/posts/smart-slackbot-for-teams/)

From 654b661688c6238caa8abf1b6af6eb47ddadeb00 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 May 2024 23:58:54 +0200
Subject: [PATCH 50/80] models(gallery): :arrow_up: update checksum (#2451)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 87a72c3b..ae10589f 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -30,8 +30,8 @@
     - filename: "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
       sha256: "14850c84ff9f06e9b51d505d64815d5cc0cea0257380353ac0b3d21b21f6e024"
       uri: "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
-### START mudler's LocalAI specific-models
 - &mudler
+  ### START mudler's LocalAI specific-models
   url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
   name: "LocalAI-llama3-8b-function-call-v0.2"
   icon: "https://cdn-uploads.huggingface.co/production/uploads/647374aa7ff32a81ac6d35d4/us5JKi9z046p8K-cn_M0w.webp"
@@ -801,8 +801,8 @@
     - https://huggingface.co/bartowski/LLaMA3-iterative-DPO-final-GGUF
     - https://huggingface.co/RLHFlow/LLaMA3-iterative-DPO-final
   description: |
-     From model card:
-      We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling.
+    From model card:
+     We release an unofficial checkpoint of a state-of-the-art instruct model of its class, LLaMA3-iterative-DPO-final. On all three widely-used instruct model benchmarks: Alpaca-Eval-V2, MT-Bench, Chat-Arena-Hard, our model outperforms all models of similar size (e.g., LLaMA-3-8B-it), most large open-sourced models (e.g., Mixtral-8x7B-it), and strong proprietary models (e.g., GPT-3.5-turbo-0613). The model is trained with open-sourced datasets without any additional human-/GPT4-labeling.
   overrides:
     parameters:
       model: LLaMA3-iterative-DPO-final-Q4_K_M.gguf
@@ -1112,20 +1112,19 @@
   urls:
     - https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF
   description: |
-      Centaurus Series
+    Centaurus Series
 
-      This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses:
+    This series aims to develop highly uncensored Large Language Models (LLMs) with the following focuses:
 
-          Science, Technology, Engineering, and Mathematics (STEM)
-          Computer Science (including programming)
-          Social Sciences
+        Science, Technology, Engineering, and Mathematics (STEM)
+        Computer Science (including programming)
+        Social Sciences
 
-      And several key cognitive skills, including but not limited to:
-
-          Reasoning and logical deduction
-          Critical thinking
-          Analysis
+    And several key cognitive skills, including but not limited to:
 
+        Reasoning and logical deduction
+        Critical thinking
+        Analysis
   icon: https://huggingface.co/fearlessdots/Llama-3-Alpha-Centauri-v0.1-GGUF/resolve/main/alpha_centauri_banner.png
   overrides:
     parameters:
@@ -1312,8 +1311,8 @@
     - filename: minicpm-llama3-mmproj-f16.gguf
       sha256: 391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e
       uri: huggingface://openbmb/MiniCPM-Llama3-V-2_5-gguf/mmproj-model-f16.gguf
-### ChatML
 - &chatml
+  ### ChatML
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "una-thepitbull-21.4b-v2"
   license: afl-3.0
@@ -1382,8 +1381,8 @@
     - filename: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
       sha256: 83647caf4a23a91697585cff391e7d1236fac867392f9e49a6dab59f81b5f810
       uri: huggingface://bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
-### START Command-r
 - &command-R
+  ### START Command-r
   url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
   name: "command-r-v01:q1_s"
   license: "cc-by-nc-4.0"
@@ -1836,8 +1835,8 @@
       model: Codestral-22B-v0.1-Q4_K_M.gguf
   files:
     - filename: "Codestral-22B-v0.1-Q4_K_M.gguf"
-      sha256: "defc9e0a1bb42857558d43df4e7f0f3d0a29d06a953e498e967d763f45d10431"
       uri: "huggingface://bartowski/Codestral-22B-v0.1-GGUF/Codestral-22B-v0.1-Q4_K_M.gguf"
+      sha256: 003e48ed892850b80994fcddca2bd6b833b092a4ef2db2853c33a3144245e06c
 - &openvino
   ### START OpenVINO
   url: "github:mudler/LocalAI/gallery/openvino.yaml@master"

From 3fe7e9f67889fa2e9941b8d89eaf74bff8db0040 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Fri, 31 May 2024 23:59:48 +0200
Subject: [PATCH 51/80] :arrow_up: Update ggerganov/whisper.cpp (#2452)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3c25c27a..84dadbf2 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=b87494bb8f1e2b5843ec606294e8c370aa25a368
+WHISPER_CPP_VERSION?=af5833e29819810f2d83228228a9a3077e5ccd93
 
 # bert.cpp version
 BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4

From 3b2bce1fc950d449af43a041be60e1f6b361bd84 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 00:03:46 +0200
Subject: [PATCH 52/80] models(gallery): add anjir (#2454)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index ae10589f..6a3846e4 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -673,6 +673,20 @@
     - filename: Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
       sha256: 265ded6a4f439bec160f394e3083a4a20e32ebb9d1d2d85196aaab23dab87fb2
       uri: huggingface://Lewdiculous/Aura_Uncensored_l3_8B-GGUF-IQ-Imatrix/Aura_Uncensored_l3_8B-Q4_K_M-imat.gguf
+- !!merge <<: *llama3
+  name: "anjir-8b-l3-i1"
+  urls:
+    - https://huggingface.co/mradermacher/Anjir-8B-L3-i1-GGUF
+  icon: https://huggingface.co/Hastagaras/Anjir-8B-L3/resolve/main/anjir.png
+  description: |
+    This model aims to achieve the human-like responses of the Halu Blackroot, the no refusal tendencies of the Halu OAS, and the smartness of the Standard Halu.
+  overrides:
+    parameters:
+      model: Anjir-8B-L3.i1-Q4_K_M.gguf
+  files:
+    - filename: Anjir-8B-L3.i1-Q4_K_M.gguf
+      uri: huggingface://mradermacher/Anjir-8B-L3-i1-GGUF/Anjir-8B-L3.i1-Q4_K_M.gguf
+      sha256: 58465ad40f92dc20cab962210ccd8a1883ce10df6ca17c6e8093815afe10dcfb
 - !!merge <<: *llama3
   name: "llama-3-lumimaid-8b-v0.1"
   urls:

From e50a7ba879f82d3330b75cd681d5920e015f14be Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 00:03:57 +0200
Subject: [PATCH 53/80] models(gallery): add llama3-11b (#2455)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 6a3846e4..5f7f7ef0 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -133,6 +133,7 @@
       uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png
   name: "llama3-8b-instruct"
   license: llama3
   description: |
@@ -375,6 +376,18 @@
     - filename: lexi-llama-3-8b-uncensored.Q6_K.gguf
       sha256: 5805f3856cc18a769fae0b7c5659fe6778574691c370c910dad6eeec62c62436
       uri: huggingface://NikolayKozloff/Lexi-Llama-3-8B-Uncensored-Q6_K-GGUF/lexi-llama-3-8b-uncensored.Q6_K.gguf
+- !!merge <<: *llama3
+  name: "llama-3-11.5b-v2"
+  urls:
+    - https://huggingface.co/bartowski/Llama-3-11.5B-V2-GGUF
+    - https://huggingface.co/Replete-AI/Llama-3-11.5B-V2
+  overrides:
+    parameters:
+      model: Llama-3-11.5B-V2-Q4_K_M.gguf
+  files:
+    - filename: Llama-3-11.5B-V2-Q4_K_M.gguf
+      sha256: 8267a75bb88655ce30a12f854930e614bcacbf8f1083dc8319c3615edb1e5ee3
+      uri: huggingface://bartowski/Llama-3-11.5B-V2-GGUF/Llama-3-11.5B-V2-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llama-3-lewdplay-8b-evo"
   urls:

From 06b461b0613b346ce627781c6e073638692757c2 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sat, 1 Jun 2024 00:09:26 +0200
Subject: [PATCH 54/80] :arrow_up: Update ggerganov/llama.cpp (#2453)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 84dadbf2..20a5f2e0 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5921b8f089d3b7bda86aac5a66825df6a6c10603
+CPPLLAMA_VERSION?=a323ec60af14a33d560df98f2cc41b4112cb4f80
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From f24dddae425b7ab3a536674d22cf66ddd3aaee90 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 00:09:51 +0200
Subject: [PATCH 55/80] models(gallery): add ultron (#2456)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5f7f7ef0..b1afb4a1 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -388,6 +388,20 @@
     - filename: Llama-3-11.5B-V2-Q4_K_M.gguf
       sha256: 8267a75bb88655ce30a12f854930e614bcacbf8f1083dc8319c3615edb1e5ee3
       uri: huggingface://bartowski/Llama-3-11.5B-V2-GGUF/Llama-3-11.5B-V2-Q4_K_M.gguf
+- !!merge <<: *llama3
+  name: "llama-3-ultron"
+  urls:
+    - https://huggingface.co/bartowski/Llama-3-Ultron-GGUF
+    - https://huggingface.co/jayasuryajsk/Llama-3-Ultron
+  description: |
+    Llama 3 abliterated with Ultron system prompt
+  overrides:
+    parameters:
+      model: Llama-3-Ultron-Q4_K_M.gguf
+  files:
+    - filename: Llama-3-Ultron-Q4_K_M.gguf
+      sha256: 5bcac832119590aafc922e5abfd9758094942ee560b136fed6d972e00c95c5e4
+      uri: huggingface://bartowski/Llama-3-Ultron-GGUF/Llama-3-Ultron-Q4_K_M.gguf
 - !!merge <<: *llama3
   name: "llama-3-lewdplay-8b-evo"
   urls:

From 0560c6fd571e105aa501bae7825a4034a59ca828 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 12:54:37 +0200
Subject: [PATCH 56/80] models(gallery): add poppy porpoise 1.0 (#2459)

modekls(gallery): add poppy porpoise 1.0

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index b1afb4a1..1f9eb32a 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1275,6 +1275,34 @@
     - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
       sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
       uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "poppy_porpoise-v1.0-l3-8b-iq-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix
+  description: |
+    "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences.
+
+    Update: Vision/multimodal capabilities again!
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+    - llava-1.5
+  overrides:
+    mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf
+    parameters:
+      model: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf
+  files:
+    - filename: Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf
+      sha256: 80cfb6cc183367e6a699023b6859d1eb22343ac440eead293fbded83dddfc908
+      uri: huggingface://Lewdiculous/Poppy_Porpoise-1.0-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-1.0-L3-8B-Q4_K_M-imat.gguf
+    - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
+      sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
+      uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
 - !!merge <<: *llama3
   name: "bunny-llama-3-8b-v"
   urls:

From 13cfa6de0aff8dcc2e61f0fa8b88f1f9e091efb6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 12:54:58 +0200
Subject: [PATCH 57/80] models(gallery): add Neural SOVLish Devil (#2460)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 1f9eb32a..4d12d623 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1218,6 +1218,20 @@
     - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
       sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
       uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "neural-sovlish-devil-8b-l3-iq-imatrix"
+  urls:
+    - https://huggingface.co/Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix
+  description: |
+    This is a merge of pre-trained language models created using mergekit.
+  icon: https://cdn-uploads.huggingface.co/production/uploads/65d4cf2693a0a3744a27536c/pJHgfEo9y-SM9-25kCRBd.png
+  overrides:
+    parameters:
+      model: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf
+  files:
+    - filename: Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf
+      sha256: b9b93f786a9f66c6d60851312934a700bb05262d59967ba66982703c2175fcb8
+      uri: huggingface://Lewdiculous/Neural-SOVLish-Devil-8B-L3-GGUF-IQ-Imatrix/Neural-SOVLish-Devil-8B-L3-Q4_K_M-imat.gguf
 - !!merge <<: *llama3
   name: "neuraldaredevil-8b-abliterated"
   urls:

From c603b95ac7718d55dc9f198ce8590178363ffe88 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 18:59:15 +0200
Subject: [PATCH 58/80] ci: pin build-time protoc (#2461)

ci: pin protoc

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/release.yaml             | 12 ++++++------
 .github/workflows/test.yml                 |  4 ++--
 Dockerfile                                 |  4 ++--
 docs/content/docs/getting-started/build.md |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 45e981a6..618c81a3 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -57,8 +57,8 @@ jobs:
       - name: Build
         id: build
         run: |
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
           export PATH=$PATH:$GOPATH/bin
           export PATH=/usr/local/cuda/bin:$PATH
           GO_TAGS=p2p make dist
@@ -88,8 +88,8 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install -y --no-install-recommends libopencv-dev protobuf-compiler ccache
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
       - name: Build stablediffusion
         run: |
           export PATH=$PATH:$GOPATH/bin
@@ -122,8 +122,8 @@ jobs:
       - name: Dependencies
         run: |
           brew install protobuf grpc
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
       - name: Build
         id: build
         run: |
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index be704187..19bf3ccd 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -93,8 +93,8 @@ jobs:
           sudo apt-get install -y cuda-nvcc-${CUDA_VERSION} libcublas-dev-${CUDA_VERSION}
           export CUDACXX=/usr/local/cuda/bin/nvcc
 
-          go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
-          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+          go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+          go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
 
           # The python3-grpc-tools package in 22.04 is too old
           pip install --user grpcio-tools
diff --git a/Dockerfile b/Dockerfile
index 74e97934..60df78d1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -33,8 +33,8 @@ RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | ta
 ENV PATH $PATH:/root/go/bin:/usr/local/go/bin
 
 # Install grpc compilers
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@latest && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0 && \
+    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
 
 COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
 RUN update-ca-certificates
diff --git a/docs/content/docs/getting-started/build.md b/docs/content/docs/getting-started/build.md
index 1cbe11df..8f8cf09f 100644
--- a/docs/content/docs/getting-started/build.md
+++ b/docs/content/docs/getting-started/build.md
@@ -55,8 +55,8 @@ apt install cmake golang libgrpc-dev make protobuf-compiler-grpc python3-grpc-to
 After you have golang installed and working, you can install the required binaries for compiling the golang protobuf components via the following commands
 
 ```bash
-go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
-go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest
+go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.0
+go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@8ba23be9613c672d40ae261d2a1335d639bdd59b
 
 ```
 

From 95c65d67f54e073cd231c5d601d38c758d227851 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sat, 1 Jun 2024 20:04:03 +0200
Subject: [PATCH 59/80] models(gallery): add all whisper variants (#2462)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml        | 154 +++++++++++++++++++++++++++++++++++++-
 gallery/whisper-base.yaml |   7 --
 2 files changed, 153 insertions(+), 8 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 4d12d623..bc6bb281 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2075,14 +2075,166 @@
       uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
       sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
 ## Whisper
-- url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
+- &whisper
+  url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
   name: "whisper-1"
   license: "MIT"
   urls:
     - https://github.com/ggerganov/whisper.cpp
     - https://huggingface.co/ggerganov/whisper.cpp
+  overrides:
+    parameters:
+      model: ggml-whisper-base.bin
+  files:
+    - filename: "ggml-whisper-base.bin"
+      sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
+      uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"
   description: |
     Port of OpenAI's Whisper model in C/C++
+- !!merge <<: *whisper
+  name: "whisper-base-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-base-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-base-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-base"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-base.bin
+  files:
+    - filename: "ggml-model-whisper-base.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin"
+- !!merge <<: *whisper
+  name: "whisper-base-en-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-base.en-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-base.en-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-base-en"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-base.en.bin
+  files:
+    - filename: "ggml-model-whisper-base.en.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin"
+- !!merge <<: *whisper
+  name: "whisper-large-q5_0"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-large-q5_0.bin
+  files:
+    - filename: "ggml-model-whisper-large-q5_0.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin"
+- !!merge <<: *whisper
+  name: "whisper-medium-q5_0"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-medium-q5_0.bin
+  files:
+    - filename: "ggml-model-whisper-medium-q5_0.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin"
+- !!merge <<: *whisper
+  name: "whisper-small-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-small-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-small-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-small"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-small.bin
+  files:
+    - filename: "ggml-model-whisper-small.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin"
+- !!merge <<: *whisper
+  name: "whisper-small-en-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-small.en-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-small.en-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-small"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-small.en.bin
+  files:
+    - filename: "ggml-model-whisper-small.en.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin"
+- !!merge <<: *whisper
+  name: "whisper-small-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-small-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-small-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-tiny"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-tiny.bin
+  files:
+    - filename: "ggml-model-whisper-tiny.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin"
+- !!merge <<: *whisper
+  name: "whisper-tiny-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-tiny-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-tiny-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-tiny-en-q5_1"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-tiny.en-q5_1.bin
+  files:
+    - filename: "ggml-model-whisper-tiny.en-q5_1.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin"
+- !!merge <<: *whisper
+  name: "whisper-tiny-en"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-tiny.en.bin
+  files:
+    - filename: "ggml-model-whisper-tiny.en.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin"
+- !!merge <<: *whisper
+  name: "whisper-tiny-en-q8_0"
+  overrides:
+    parameters:
+      model: ggml-model-whisper-tiny.en-q8_0.bin
+  files:
+    - filename: "ggml-model-whisper-tiny.en-q8_0.bin"
+      sha256: ""
+      uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
 ## Bert embeddings
 - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
   name: "bert-embeddings"
diff --git a/gallery/whisper-base.yaml b/gallery/whisper-base.yaml
index 2dc24d6e..9d68c776 100644
--- a/gallery/whisper-base.yaml
+++ b/gallery/whisper-base.yaml
@@ -3,10 +3,3 @@ name: "whisper-base"
 
 config_file: |
   backend: whisper
-  parameters:
-    model: ggml-whisper-base.bin
-
-files:
-  - filename: "ggml-whisper-base.bin"
-    sha256: "60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"
-    uri: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"

From b99182c8d4424cb1f77bbc36628af432525ef7ee Mon Sep 17 00:00:00 2001
From: Chakib Benziane <contact@blob42.xyz>
Date: Sat, 1 Jun 2024 20:26:27 +0200
Subject: [PATCH 60/80] TTS  API improvements (#2308)

* update doc on COQUI_LANGUAGE env variable

Signed-off-by: blob42 <contact@blob42.xyz>

* return errors from tts gRPC backend

Signed-off-by: blob42 <contact@blob42.xyz>

* handle speaker_id and language in coqui TTS backend

Signed-off-by: blob42 <contact@blob42.xyz>

* TTS endpoint: add optional language paramter

Signed-off-by: blob42 <contact@blob42.xyz>

* tts fix: empty language string breaks non-multilingual models

Signed-off-by: blob42 <contact@blob42.xyz>

* allow tts param definition in config file

- consolidate TTS options under `tts` config entry

Signed-off-by: blob42 <contact@blob42.xyz>

* tts: update doc

Signed-off-by: blob42 <contact@blob42.xyz>

---------

Signed-off-by: blob42 <contact@blob42.xyz>
Co-authored-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 Makefile                                    |   2 +-
 backend/backend.proto                       |   1 +
 backend/python/coqui/backend.py             |  16 ++-
 core/backend/tts.go                         |  17 ++-
 core/cli/tts.go                             |   3 +-
 core/config/backend_config.go               |  13 ++-
 core/http/endpoints/elevenlabs/tts.go       |   2 +-
 core/http/endpoints/localai/tts.go          |  22 +++-
 core/schema/localai.go                      | 120 ++++++++++----------
 docs/content/docs/features/text-to-audio.md |  48 ++++++--
 10 files changed, 166 insertions(+), 78 deletions(-)

diff --git a/Makefile b/Makefile
index 20a5f2e0..71ce394f 100644
--- a/Makefile
+++ b/Makefile
@@ -447,7 +447,7 @@ protogen-clean: protogen-go-clean protogen-python-clean
 .PHONY: protogen-go
 protogen-go:
 	mkdir -p pkg/grpc/proto
-	protoc -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
+	protoc --experimental_allow_proto3_optional -Ibackend/ --go_out=pkg/grpc/proto/ --go_opt=paths=source_relative --go-grpc_out=pkg/grpc/proto/ --go-grpc_opt=paths=source_relative \
     backend/backend.proto
 
 .PHONY: protogen-go-clean
diff --git a/backend/backend.proto b/backend/backend.proto
index cb87fe02..aec0c00e 100644
--- a/backend/backend.proto
+++ b/backend/backend.proto
@@ -266,6 +266,7 @@ message TTSRequest {
   string model = 2;
   string dst = 3;
   string voice = 4;
+  optional string language = 5;
 }
 
 message TokenizationResponse {
diff --git a/backend/python/coqui/backend.py b/backend/python/coqui/backend.py
index c6432208..02ab56f4 100644
--- a/backend/python/coqui/backend.py
+++ b/backend/python/coqui/backend.py
@@ -66,7 +66,21 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
     def TTS(self, request, context):
         try:
-            self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=COQUI_LANGUAGE, file_path=request.dst)
+            # if model is multilangual add language from request or env as fallback
+            lang = request.language or COQUI_LANGUAGE
+            if lang == "":
+                lang = None
+            if self.tts.is_multi_lingual and lang is None:
+               return backend_pb2.Result(success=False, message=f"Model is multi-lingual, but no language was provided")
+
+            # if model is multi-speaker, use speaker_wav or the speaker_id from request.voice
+            if self.tts.is_multi_speaker and self.AudioPath is None and request.voice is None:
+                return backend_pb2.Result(success=False, message=f"Model is multi-speaker, but no speaker was provided")
+
+            if self.tts.is_multi_speaker and request.voice is not None:
+               self.tts.tts_to_file(text=request.text, speaker=request.voice, language=lang, file_path=request.dst)
+            else:
+                self.tts.tts_to_file(text=request.text, speaker_wav=self.AudioPath, language=lang, file_path=request.dst)
         except Exception as err:
             return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
         return backend_pb2.Result(success=True)
diff --git a/core/backend/tts.go b/core/backend/tts.go
index 4532cf00..b1c23ebb 100644
--- a/core/backend/tts.go
+++ b/core/backend/tts.go
@@ -29,7 +29,16 @@ func generateUniqueFileName(dir, baseName, ext string) string {
 	}
 }
 
-func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (string, *proto.Result, error) {
+func ModelTTS(
+	backend,
+	text,
+	modelFile,
+	voice ,
+	language string,
+	loader *model.ModelLoader,
+	appConfig *config.ApplicationConfig,
+	backendConfig config.BackendConfig,
+) (string, *proto.Result, error) {
 	bb := backend
 	if bb == "" {
 		bb = model.PiperBackend
@@ -83,7 +92,13 @@ func ModelTTS(backend, text, modelFile, voice string, loader *model.ModelLoader,
 		Model: modelPath,
 		Voice: voice,
 		Dst:   filePath,
+		Language: &language,
 	})
 
+	// return RPC error if any
+	if !res.Success {
+		return "", nil, fmt.Errorf(res.Message)
+	}
+
 	return filePath, res, err
 }
diff --git a/core/cli/tts.go b/core/cli/tts.go
index 8b54ed28..cbba0fc5 100644
--- a/core/cli/tts.go
+++ b/core/cli/tts.go
@@ -20,6 +20,7 @@ type TTSCMD struct {
 	Backend           string `short:"b" default:"piper" help:"Backend to run the TTS model"`
 	Model             string `short:"m" required:"" help:"Model name to run the TTS"`
 	Voice             string `short:"v" help:"Voice name to run the TTS"`
+	Language          string `short:"l" help:"Language to use with the TTS"`
 	OutputFile        string `short:"o" type:"path" help:"The path to write the output wav file"`
 	ModelsPath        string `env:"LOCALAI_MODELS_PATH,MODELS_PATH" type:"path" default:"${basepath}/models" help:"Path containing models used for inferencing" group:"storage"`
 	BackendAssetsPath string `env:"LOCALAI_BACKEND_ASSETS_PATH,BACKEND_ASSETS_PATH" type:"path" default:"/tmp/localai/backend_data" help:"Path used to extract libraries that are required by some of the backends in runtime" group:"storage"`
@@ -52,7 +53,7 @@ func (t *TTSCMD) Run(ctx *cliContext.Context) error {
 	options := config.BackendConfig{}
 	options.SetDefaults()
 
-	filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, ml, opts, options)
+	filePath, _, err := backend.ModelTTS(t.Backend, text, t.Model, t.Voice, t.Language, ml, opts, options)
 	if err != nil {
 		return err
 	}
diff --git a/core/config/backend_config.go b/core/config/backend_config.go
index eda66360..1ca11716 100644
--- a/core/config/backend_config.go
+++ b/core/config/backend_config.go
@@ -15,6 +15,15 @@ const (
 	RAND_SEED = -1
 )
 
+type TTSConfig struct {
+
+	// Voice wav path or id
+	Voice string `yaml:"voice"`
+
+	// Vall-e-x
+	VallE    VallE  `yaml:"vall-e"`
+}
+
 type BackendConfig struct {
 	schema.PredictionOptions `yaml:"parameters"`
 	Name                     string `yaml:"name"`
@@ -49,8 +58,8 @@ type BackendConfig struct {
 	// GRPC Options
 	GRPC GRPC `yaml:"grpc"`
 
-	// Vall-e-x
-	VallE VallE `yaml:"vall-e"`
+	// TTS specifics
+	TTSConfig `yaml:"tts"`
 
 	// CUDA
 	// Explicitly enable CUDA or not (some backends might need it)
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index 841f9b5f..e7bfe0f7 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -52,7 +52,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 		}
 		log.Debug().Msgf("Request for model: %s", modelFile)
 
-		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, voiceID, ml, appConfig, *cfg)
+		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Text, modelFile, "", voiceID, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index 7822e024..4e5a1b5b 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -12,10 +12,13 @@ import (
 )
 
 // TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
-// @Summary Generates audio from the input text.
-// @Param request body schema.TTSRequest true "query params"
-// @Success 200 {string} binary	 "Response"
-// @Router /v1/audio/speech [post]
+//	@Summary	Generates audio from the input text.
+//  @Accept json
+//  @Produce audio/x-wav
+//	@Param		request	body		schema.TTSRequest	true	"query params"
+//	@Success	200		{string}	binary				"generated audio/wav file"
+//	@Router		/v1/audio/speech [post]
+//	@Router		/tts [post]
 func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
@@ -40,6 +43,7 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 		)
 
 		if err != nil {
+			log.Err(err)
 			modelFile = input.Model
 			log.Warn().Msgf("Model not found in context: %s", input.Model)
 		} else {
@@ -51,7 +55,15 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
 			cfg.Backend = input.Backend
 		}
 
-		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, input.Voice, ml, appConfig, *cfg)
+		if input.Language != "" {
+			cfg.Language = input.Language
+		}
+
+		if input.Voice != "" {
+			cfg.Voice = input.Voice
+		}
+
+		filePath, _, err := backend.ModelTTS(cfg.Backend, input.Input, modelFile, cfg.Voice, cfg.Language, ml, appConfig, *cfg)
 		if err != nil {
 			return err
 		}
diff --git a/core/schema/localai.go b/core/schema/localai.go
index e9b61cf3..9bbfe28b 100644
--- a/core/schema/localai.go
+++ b/core/schema/localai.go
@@ -1,59 +1,61 @@
-package schema
-
-import (
-	gopsutil "github.com/shirou/gopsutil/v3/process"
-)
-
-type BackendMonitorRequest struct {
-	Model string `json:"model" yaml:"model"`
-}
-
-type BackendMonitorResponse struct {
-	MemoryInfo    *gopsutil.MemoryInfoStat
-	MemoryPercent float32
-	CPUPercent    float64
-}
-
-type TTSRequest struct {
-	Model   string `json:"model" yaml:"model"`
-	Input   string `json:"input" yaml:"input"`
-	Voice   string `json:"voice" yaml:"voice"`
-	Backend string `json:"backend" yaml:"backend"`
-}
-
-type StoresSet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-type StoresDelete struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys"`
-}
-
-type StoresGet struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Keys [][]float32 `json:"keys" yaml:"keys"`
-}
-
-type StoresGetResponse struct {
-	Keys   [][]float32 `json:"keys" yaml:"keys"`
-	Values []string    `json:"values" yaml:"values"`
-}
-
-type StoresFind struct {
-	Store string `json:"store,omitempty" yaml:"store,omitempty"`
-
-	Key  []float32 `json:"key" yaml:"key"`
-	Topk int       `json:"topk" yaml:"topk"`
-}
-
-type StoresFindResponse struct {
-	Keys         [][]float32 `json:"keys" yaml:"keys"`
-	Values       []string    `json:"values" yaml:"values"`
-	Similarities []float32   `json:"similarities" yaml:"similarities"`
-}
+package schema
+
+import (
+	gopsutil "github.com/shirou/gopsutil/v3/process"
+)
+
+type BackendMonitorRequest struct {
+	Model string `json:"model" yaml:"model"`
+}
+
+type BackendMonitorResponse struct {
+	MemoryInfo    *gopsutil.MemoryInfoStat
+	MemoryPercent float32
+	CPUPercent    float64
+}
+
+// @Description TTS request body
+type TTSRequest struct {
+	Model    string `json:"model" yaml:"model"` // model name or full path
+	Input    string `json:"input" yaml:"input"` // text input
+	Voice    string `json:"voice" yaml:"voice"` // voice audio file or speaker id
+	Backend  string `json:"backend" yaml:"backend"`
+	Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
+}
+
+type StoresSet struct {
+	Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+	Keys   [][]float32 `json:"keys" yaml:"keys"`
+	Values []string    `json:"values" yaml:"values"`
+}
+
+type StoresDelete struct {
+	Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+	Keys [][]float32 `json:"keys"`
+}
+
+type StoresGet struct {
+	Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+	Keys [][]float32 `json:"keys" yaml:"keys"`
+}
+
+type StoresGetResponse struct {
+	Keys   [][]float32 `json:"keys" yaml:"keys"`
+	Values []string    `json:"values" yaml:"values"`
+}
+
+type StoresFind struct {
+	Store string `json:"store,omitempty" yaml:"store,omitempty"`
+
+	Key  []float32 `json:"key" yaml:"key"`
+	Topk int       `json:"topk" yaml:"topk"`
+}
+
+type StoresFindResponse struct {
+	Keys         [][]float32 `json:"keys" yaml:"keys"`
+	Values       []string    `json:"values" yaml:"values"`
+	Similarities []float32   `json:"similarities" yaml:"similarities"`
+}
diff --git a/docs/content/docs/features/text-to-audio.md b/docs/content/docs/features/text-to-audio.md
index ebfdda1d..0e82f7f0 100644
--- a/docs/content/docs/features/text-to-audio.md
+++ b/docs/content/docs/features/text-to-audio.md
@@ -46,6 +46,10 @@ Coqui works without any configuration, to test it, you can run the following cur
         }'
 ```
 
+You can use the env variable COQUI_LANGUAGE to set the language used by the coqui backend.
+
+You can also use config files to configure tts models (see section below on how to use config files).
+
 ### Bark
 
 [Bark](https://github.com/suno-ai/bark) allows to generate audio from text prompts.
@@ -148,11 +152,12 @@ name: cloned-voice
 backend: vall-e-x
 parameters:
   model: "cloned-voice"
-vall-e:
-  # The path to the audio file to be cloned
-  # relative to the models directory
-  # Max 15s
-  audio_path: "audio-sample.wav"
+tts:
+    vall-e:
+      # The path to the audio file to be cloned
+      # relative to the models directory
+      # Max 15s
+      audio_path: "audio-sample.wav"
 ```
 
 Then you can specify the model name in the requests:
@@ -164,6 +169,35 @@ curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
    }' | aplay
 ```
 
-## Parler-tts
+### Parler-tts
 
-`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts
\ No newline at end of file
+`parler-tts`. It is possible to install and configure the model directly from the gallery. https://github.com/huggingface/parler-tts
+
+
+## Using config files
+
+You can also use a `config-file` to specify TTS models and their parameters.
+
+In the following example we define a custom config to load the `xtts_v2` model, and specify a voice and language.
+
+```yaml
+
+name: xtts_v2
+backend: coqui
+parameters:
+  language: fr
+  model: tts_models/multilingual/multi-dataset/xtts_v2
+
+tts:
+  voice: Ana Florence
+```
+
+With this config, you can now use the following curl command to generate a text-to-speech audio file:
+```bash
+curl -L http://localhost:8080/tts \
+    -H "Content-Type: application/json" \
+    -d '{
+"model": "xtts_v2",
+"input": "Bonjour, je suis Ana Florence. Comment puis-je vous aider?"
+}' | aplay
+```

From fb0f188c93043a487438935dd2edc451b8416b06 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 2 Jun 2024 00:04:01 +0200
Subject: [PATCH 61/80] feat(swagger): update swagger (#2464)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 swagger/docs.go      | 46 +++++++++++++++++++++++++++++++++++++++++++-
 swagger/swagger.json | 46 +++++++++++++++++++++++++++++++++++++++++++-
 swagger/swagger.yaml | 32 +++++++++++++++++++++++++++++-
 3 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/swagger/docs.go b/swagger/docs.go
index 29e04af6..f48b9661 100644
--- a/swagger/docs.go
+++ b/swagger/docs.go
@@ -22,6 +22,36 @@ const docTemplate = `{
     "host": "{{.Host}}",
     "basePath": "{{.BasePath}}",
     "paths": {
+        "/tts": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/v1/assistants": {
             "post": {
                 "summary": "Create an assistant with a model and instructions.",
@@ -48,6 +78,12 @@ const docTemplate = `{
         },
         "/v1/audio/speech": {
             "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -62,7 +98,7 @@ const docTemplate = `{
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response",
+                        "description": "generated audio/wav file",
                         "schema": {
                             "type": "string"
                         }
@@ -771,18 +807,26 @@ const docTemplate = `{
             }
         },
         "schema.TTSRequest": {
+            "description": "TTS request body",
             "type": "object",
             "properties": {
                 "backend": {
                     "type": "string"
                 },
                 "input": {
+                    "description": "text input",
+                    "type": "string"
+                },
+                "language": {
+                    "description": "(optional) language to use with TTS model",
                     "type": "string"
                 },
                 "model": {
+                    "description": "model name or full path",
                     "type": "string"
                 },
                 "voice": {
+                    "description": "voice audio file or speaker id",
                     "type": "string"
                 }
             }
diff --git a/swagger/swagger.json b/swagger/swagger.json
index 1933da3a..1eba0ff3 100644
--- a/swagger/swagger.json
+++ b/swagger/swagger.json
@@ -15,6 +15,36 @@
     },
     "basePath": "/",
     "paths": {
+        "/tts": {
+            "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "generated audio/wav file",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
         "/v1/assistants": {
             "post": {
                 "summary": "Create an assistant with a model and instructions.",
@@ -41,6 +71,12 @@
         },
         "/v1/audio/speech": {
             "post": {
+                "consumes": [
+                    "application/json"
+                ],
+                "produces": [
+                    "audio/x-wav"
+                ],
                 "summary": "Generates audio from the input text.",
                 "parameters": [
                     {
@@ -55,7 +91,7 @@
                 ],
                 "responses": {
                     "200": {
-                        "description": "Response",
+                        "description": "generated audio/wav file",
                         "schema": {
                             "type": "string"
                         }
@@ -764,18 +800,26 @@
             }
         },
         "schema.TTSRequest": {
+            "description": "TTS request body",
             "type": "object",
             "properties": {
                 "backend": {
                     "type": "string"
                 },
                 "input": {
+                    "description": "text input",
+                    "type": "string"
+                },
+                "language": {
+                    "description": "(optional) language to use with TTS model",
                     "type": "string"
                 },
                 "model": {
+                    "description": "model name or full path",
                     "type": "string"
                 },
                 "voice": {
+                    "description": "voice audio file or speaker id",
                     "type": "string"
                 }
             }
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
index 33ce0b78..db4ef52f 100644
--- a/swagger/swagger.yaml
+++ b/swagger/swagger.yaml
@@ -367,14 +367,21 @@ definitions:
         type: integer
     type: object
   schema.TTSRequest:
+    description: TTS request body
     properties:
       backend:
         type: string
       input:
+        description: text input
+        type: string
+      language:
+        description: (optional) language to use with TTS model
         type: string
       model:
+        description: model name or full path
         type: string
       voice:
+        description: voice audio file or speaker id
         type: string
     type: object
   schema.ToolCall:
@@ -399,6 +406,25 @@ info:
   title: LocalAI API
   version: 2.0.0
 paths:
+  /tts:
+    post:
+      consumes:
+      - application/json
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.TTSRequest'
+      produces:
+      - audio/x-wav
+      responses:
+        "200":
+          description: generated audio/wav file
+          schema:
+            type: string
+      summary: Generates audio from the input text.
   /v1/assistants:
     post:
       parameters:
@@ -416,6 +442,8 @@ paths:
       summary: Create an assistant with a model and instructions.
   /v1/audio/speech:
     post:
+      consumes:
+      - application/json
       parameters:
       - description: query params
         in: body
@@ -423,9 +451,11 @@ paths:
         required: true
         schema:
           $ref: '#/definitions/schema.TTSRequest'
+      produces:
+      - audio/x-wav
       responses:
         "200":
-          description: Response
+          description: generated audio/wav file
           schema:
             type: string
       summary: Generates audio from the input text.

From b588cae70efb6ba644d49a074a2c34fc1cb156e1 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 2 Jun 2024 00:31:32 +0200
Subject: [PATCH 62/80] :arrow_up: Update ggerganov/llama.cpp (#2465)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 71ce394f..a8df4e43 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=a323ec60af14a33d560df98f2cc41b4112cb4f80
+CPPLLAMA_VERSION?=2e666832e6ac78194edf030bd1c295e21bdb022c
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From c9092ad39c627ba6e5b085da45eb67233a0f9938 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 2 Jun 2024 01:13:02 +0200
Subject: [PATCH 63/80] models(gallery): :arrow_up: update checksum (#2463)

:arrow_up: Checksum updates in gallery/index.yaml

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 gallery/index.yaml | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index bc6bb281..89947341 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -2074,8 +2074,8 @@
     - filename: DreamShaper_8_pruned.safetensors
       uri: huggingface://Lykon/DreamShaper/DreamShaper_8_pruned.safetensors
       sha256: 879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
-## Whisper
 - &whisper
+  ## Whisper
   url: "github:mudler/LocalAI/gallery/whisper-base.yaml@master"
   name: "whisper-1"
   license: "MIT"
@@ -2098,8 +2098,8 @@
       model: ggml-model-whisper-base-q5_1.bin
   files:
     - filename: "ggml-model-whisper-base-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin"
+      sha256: 422f1ae452ade6f30a004d7e5c6a43195e4433bc370bf23fac9cc591f01a8898
 - !!merge <<: *whisper
   name: "whisper-base"
   overrides:
@@ -2107,8 +2107,8 @@
       model: ggml-model-whisper-base.bin
   files:
     - filename: "ggml-model-whisper-base.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.bin"
+      sha256: 60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe
 - !!merge <<: *whisper
   name: "whisper-base-en-q5_1"
   overrides:
@@ -2116,8 +2116,8 @@
       model: ggml-model-whisper-base.en-q5_1.bin
   files:
     - filename: "ggml-model-whisper-base.en-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en-q5_1.bin"
+      sha256: 4baf70dd0d7c4247ba2b81fafd9c01005ac77c2f9ef064e00dcf195d0e2fdd2f
 - !!merge <<: *whisper
   name: "whisper-base-en"
   overrides:
@@ -2125,8 +2125,8 @@
       model: ggml-model-whisper-base.en.bin
   files:
     - filename: "ggml-model-whisper-base.en.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-base.en.bin"
+      sha256: a03779c86df3323075f5e796cb2ce5029f00ec8869eee3fdfb897afe36c6d002
 - !!merge <<: *whisper
   name: "whisper-large-q5_0"
   overrides:
@@ -2134,8 +2134,8 @@
       model: ggml-model-whisper-large-q5_0.bin
   files:
     - filename: "ggml-model-whisper-large-q5_0.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-large-q5_0.bin"
+      sha256: 3a214837221e4530dbc1fe8d734f302af393eb30bd0ed046042ebf4baf70f6f2
 - !!merge <<: *whisper
   name: "whisper-medium-q5_0"
   overrides:
@@ -2143,8 +2143,8 @@
       model: ggml-model-whisper-medium-q5_0.bin
   files:
     - filename: "ggml-model-whisper-medium-q5_0.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin"
+      sha256: 19fea4b380c3a618ec4723c3eef2eb785ffba0d0538cf43f8f235e7b3b34220f
 - !!merge <<: *whisper
   name: "whisper-small-q5_1"
   overrides:
@@ -2152,8 +2152,8 @@
       model: ggml-model-whisper-small-q5_1.bin
   files:
     - filename: "ggml-model-whisper-small-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
+      sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
 - !!merge <<: *whisper
   name: "whisper-small"
   overrides:
@@ -2161,8 +2161,8 @@
       model: ggml-model-whisper-small.bin
   files:
     - filename: "ggml-model-whisper-small.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.bin"
+      sha256: 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b
 - !!merge <<: *whisper
   name: "whisper-small-en-q5_1"
   overrides:
@@ -2170,8 +2170,8 @@
       model: ggml-model-whisper-small.en-q5_1.bin
   files:
     - filename: "ggml-model-whisper-small.en-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en-q5_1.bin"
+      sha256: bfdff4894dcb76bbf647d56263ea2a96645423f1669176f4844a1bf8e478ad30
 - !!merge <<: *whisper
   name: "whisper-small"
   overrides:
@@ -2179,8 +2179,8 @@
       model: ggml-model-whisper-small.en.bin
   files:
     - filename: "ggml-model-whisper-small.en.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-small.en.bin"
+      sha256: c6138d6d58ecc8322097e0f987c32f1be8bb0a18532a3f88f734d1bbf9c41e5d
 - !!merge <<: *whisper
   name: "whisper-small-q5_1"
   overrides:
@@ -2188,8 +2188,8 @@
       model: ggml-model-whisper-small-q5_1.bin
   files:
     - filename: "ggml-model-whisper-small-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin"
+      sha256: ae85e4a935d7a567bd102fe55afc16bb595bdb618e11b2fc7591bc08120411bb
 - !!merge <<: *whisper
   name: "whisper-tiny"
   overrides:
@@ -2197,8 +2197,8 @@
       model: ggml-model-whisper-tiny.bin
   files:
     - filename: "ggml-model-whisper-tiny.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.bin"
+      sha256: be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21
 - !!merge <<: *whisper
   name: "whisper-tiny-q5_1"
   overrides:
@@ -2206,8 +2206,8 @@
       model: ggml-model-whisper-tiny-q5_1.bin
   files:
     - filename: "ggml-model-whisper-tiny-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin"
+      sha256: 818710568da3ca15689e31a743197b520007872ff9576237bda97bd1b469c3d7
 - !!merge <<: *whisper
   name: "whisper-tiny-en-q5_1"
   overrides:
@@ -2215,8 +2215,8 @@
       model: ggml-model-whisper-tiny.en-q5_1.bin
   files:
     - filename: "ggml-model-whisper-tiny.en-q5_1.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin"
+      sha256: c77c5766f1cef09b6b7d47f21b546cbddd4157886b3b5d6d4f709e91e66c7c2b
 - !!merge <<: *whisper
   name: "whisper-tiny-en"
   overrides:
@@ -2224,8 +2224,8 @@
       model: ggml-model-whisper-tiny.en.bin
   files:
     - filename: "ggml-model-whisper-tiny.en.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en.bin"
+      sha256: 921e4cf8686fdd993dcd081a5da5b6c365bfde1162e72b08d75ac75289920b1f
 - !!merge <<: *whisper
   name: "whisper-tiny-en-q8_0"
   overrides:
@@ -2233,8 +2233,8 @@
       model: ggml-model-whisper-tiny.en-q8_0.bin
   files:
     - filename: "ggml-model-whisper-tiny.en-q8_0.bin"
-      sha256: ""
       uri: "https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin"
+      sha256: 5bc2b3860aa151a4c6e7bb095e1fcce7cf12c7b020ca08dcec0c6d018bb7dd94
 ## Bert embeddings
 - url: "github:mudler/LocalAI/gallery/bert-embeddings.yaml@master"
   name: "bert-embeddings"

From c0744899c9708c41b4d6b6f78c2a30f79d5e49a8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 2 Jun 2024 01:15:06 +0200
Subject: [PATCH 64/80] models(gallery): add gemma-2b (#2466)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/gemma.yaml | 20 ++++++++++++++++++++
 gallery/index.yaml | 22 ++++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 gallery/gemma.yaml

diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml
new file mode 100644
index 00000000..30b49a07
--- /dev/null
+++ b/gallery/gemma.yaml
@@ -0,0 +1,20 @@
+---
+name: "gemma"
+
+config_file: |
+  mmap: true
+  context_size: 8192
+  template:
+    chat_message: |-
+      <start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}}
+      {{ if .Content -}}
+      {{.Content }}
+      {{ end -}}<end_of_turn>
+    chat: |
+      {{.Input -}}
+      <start_of_turn>model
+    completion: |
+      {{.Input}}
+  stopwords:
+  - '<|im_end|>'
+  - '<end_of_turn>'
diff --git a/gallery/index.yaml b/gallery/index.yaml
index 89947341..5997c074 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -131,6 +131,28 @@
     - filename: Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
       sha256: 447587bd8f60d9050232148d34fdb2d88b15b2413fd7f8e095a4606ec60b45bf
       uri: huggingface://bartowski/Einstein-v6.1-Llama3-8B-GGUF/Einstein-v6.1-Llama3-8B-Q4_K_M.gguf
+- &gemma
+  url: "github:mudler/LocalAI/gallery/gemma.yaml@master"
+  name: "gemma-2b"
+  license: gemma
+  urls:
+    - https://ai.google.dev/gemma/docs
+    - https://huggingface.co/mlabonne/gemma-2b-GGUF
+  description: |
+    Open source LLM from Google
+  tags:
+    - llm
+    - gguf
+    - gpu
+    - cpu
+    - gemma
+  overrides:
+    parameters:
+      model: gemma-2b.Q4_K_M.gguf
+  files:
+    - filename: gemma-2b.Q4_K_M.gguf
+      sha256: 37d50c21ef7847926204ad9b3007127d9a2722188cfd240ce7f9f7f041aa71a5
+      uri: huggingface://mlabonne/gemma-2b-GGUF/gemma-2b.Q4_K_M.gguf
 - &llama3
   url: "github:mudler/LocalAI/gallery/llama3-instruct.yaml@master"
   icon: https://cdn-uploads.huggingface.co/production/uploads/642cc1c253e76b4c2286c58e/aJJxKus1wP5N-euvHEUq7.png

From 29ff51c12ab754d80206a6d1b63fc18aba320dc6 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 2 Jun 2024 01:26:41 +0200
Subject: [PATCH 65/80] Update gemma stopwords

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/gemma.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml
index 30b49a07..31b0f47a 100644
--- a/gallery/gemma.yaml
+++ b/gallery/gemma.yaml
@@ -18,3 +18,4 @@ config_file: |
   stopwords:
   - '<|im_end|>'
   - '<end_of_turn>'
+  - '<start_of_turn>'

From 77d752a481977550b7a9dd35801c09753fa7a82d Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Sun, 2 Jun 2024 10:51:58 +0200
Subject: [PATCH 66/80] fix(gemma): correctly format the template

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 gallery/gemma.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gallery/gemma.yaml b/gallery/gemma.yaml
index 31b0f47a..bff7d614 100644
--- a/gallery/gemma.yaml
+++ b/gallery/gemma.yaml
@@ -8,10 +8,10 @@ config_file: |
     chat_message: |-
       <start_of_turn>{{if eq .RoleName "assistant" }}model{{else}}{{ .RoleName }}{{end}}
       {{ if .Content -}}
-      {{.Content }}
+      {{.Content -}}
       {{ end -}}<end_of_turn>
     chat: |
-      {{.Input -}}
+      {{.Input }}
       <start_of_turn>model
     completion: |
       {{.Input}}

From 5ddaa19914e7c33b7023c565462bc0fe57b8029a Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Sun, 2 Jun 2024 23:34:29 +0200
Subject: [PATCH 67/80] :arrow_up: Update ggerganov/llama.cpp (#2467)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a8df4e43..89a55e6d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=2e666832e6ac78194edf030bd1c295e21bdb022c
+CPPLLAMA_VERSION?=7c4e5b7eae26581869e782015d9deca947c34997
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 4a239a4bff9036cd5fd24cb5492a6ec384a87531 Mon Sep 17 00:00:00 2001
From: fakezeta <fakezeta@gmail.com>
Date: Mon, 3 Jun 2024 08:52:55 +0200
Subject: [PATCH 68/80] feat(transformers): various enhancements to the
 transformers backend (#2468)

update transformers

*Handle Temperature = 0 as greedy search
*Handle custom works as stop words
*Implement KV cache
*Phi 3 no more requires trust_remote_code: true
---
 backend/python/transformers/backend.py | 59 ++++++++++++++++----------
 1 file changed, 36 insertions(+), 23 deletions(-)
 mode change 100755 => 100644 backend/python/transformers/backend.py

diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py
old mode 100755
new mode 100644
index b1e0d559..10603d2e
--- a/backend/python/transformers/backend.py
+++ b/backend/python/transformers/backend.py
@@ -22,9 +22,9 @@ import torch.cuda
 
 XPU=os.environ.get("XPU", "0") == "1"
 if XPU:
-    from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer
+    from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
 else:
-    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer
+    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
 
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -246,28 +246,28 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
 
         # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence
         sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
-#        print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr)
-#        print("Embeddings:", sentence_embeddings, file=sys.stderr)
         return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0])
 
     async def _predict(self, request, context, streaming=False): 
         set_seed(request.Seed)
-        if request.TopP == 0:
-            request.TopP = 0.9
+        if request.TopP < 0 or request.TopP > 1:
+            request.TopP = 1
         
-        if request.TopK == 0:
-            request.TopK = 40
+        if request.TopK <= 0:
+            request.TopK = 50
+
+        if request.Temperature > 0 :
+            sample=True
+        else:
+            sample=False
+            request.TopP == None
+            request.TopK == None
+            request.Temperature == None
 
         prompt = request.Prompt
         if not request.Prompt and request.UseTokenizerTemplate and request.Messages:    
             prompt = self.tokenizer.apply_chat_template(request.Messages, tokenize=False, add_generation_prompt=True)
 
-        eos_token_id = self.tokenizer.eos_token_id
-        if request.StopPrompts:
-            eos_token_id = []
-            for word in request.StopPrompts:
-                eos_token_id.append(self.tokenizer.convert_tokens_to_ids(word))
-
         inputs = self.tokenizer(prompt, return_tensors="pt")
 
         if request.Tokens > 0:
@@ -281,6 +281,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
             inputs = inputs.to("xpu")
             streaming = False
 
+        criteria=[]
+        if request.StopPrompts:
+            criteria = StoppingCriteriaList(
+                [
+                    StopStringCriteria(tokenizer=self.tokenizer, stop_strings=request.StopPrompts),
+                ]
+            )
+
         if streaming:
             streamer=TextIteratorStreamer(self.tokenizer,
                                         skip_prompt=True,
@@ -290,11 +298,14 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                         temperature=request.Temperature, 
                         top_p=request.TopP,
                         top_k=request.TopK, 
-                        do_sample=True,
+                        do_sample=sample,
                         attention_mask=inputs["attention_mask"],
-                        eos_token_id=eos_token_id,
+                        eos_token_id=self.tokenizer.eos_token_id,
                         pad_token_id=self.tokenizer.eos_token_id,
-                        streamer=streamer)
+                        streamer=streamer,
+                        stopping_criteria=criteria,
+                        use_cache=True,
+                        )
             thread=Thread(target=self.model.generate, kwargs=config)
             thread.start()
             generated_text = ""
@@ -311,18 +322,20 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                     temperature=request.Temperature, 
                                     top_p=request.TopP,
                                     top_k=request.TopK, 
-                                    do_sample=True,
+                                    do_sample=sample,
                                     pad_token=self.tokenizer.eos_token_id)
             else:
-                outputs = self.model.generate(inputs["input_ids"],
+                outputs = self.model.generate(**inputs,
                         max_new_tokens=max_tokens, 
                         temperature=request.Temperature, 
                         top_p=request.TopP,
                         top_k=request.TopK, 
-                        do_sample=True,
-                        attention_mask=inputs["attention_mask"],
-                        eos_token_id=eos_token_id,
-                        pad_token_id=self.tokenizer.eos_token_id)
+                        do_sample=sample,
+                        eos_token_id=self.tokenizer.eos_token_id,
+                        pad_token_id=self.tokenizer.eos_token_id,
+                        stopping_criteria=criteria,
+                        use_cache=True,
+                        )
             generated_text = self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
 
         if streaming:

From 90945ebab3c21eed5b5087eef3646bff5eafefcc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Jun 2024 15:44:27 +0200
Subject: [PATCH 69/80] models(gallery): add fimbulvetr iqmatrix version
 (#2470)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 5997c074..0978360d 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1020,6 +1020,15 @@
     - filename: Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf
       sha256: 3597dacfb0ab717d565d8a4d6067f10dcb0e26cc7f21c832af1a10a87882a8fd
       uri: huggingface://Sao10K/Fimbulvetr-11B-v2-GGUF/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf
+- !!merge <<: *vicuna-chat
+  name: "fimbulvetr-11b-v2-iq-imatrix"
+  overrides:
+    parameters:
+      model: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
+  files:
+    - filename: Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
+      sha256: 3f309b59508342536a70edd6c4be6cf4f2cb97f2e32cbc79ad2ab3f4c02933a4
+      uri: huggingface://Lewdiculous/Fimbulvetr-11B-v2-GGUF-IQ-Imatrix/Fimbulvetr-11B-v2-Q4_K_M-imat.gguf
 - &noromaid
   ### Start noromaid
   url: "github:mudler/LocalAI/gallery/noromaid.yaml@master"

From bae2a649fd2dc55717449ff19154cefbd8bb2916 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Jun 2024 15:44:52 +0200
Subject: [PATCH 70/80] models(gallery): add new poppy porpoise versions
 (#2471)

models(gallery): add new poppy purpoise versions

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 gallery/index.yaml | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/gallery/index.yaml b/gallery/index.yaml
index 0978360d..172d1bab 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -1348,6 +1348,62 @@
     - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
       sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
       uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "poppy_porpoise-v1.30-l3-8b-iq-imatrix"
+  urls:
+    - https://huggingface.co/mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF
+  description: |
+    "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences.
+
+    Update: Vision/multimodal capabilities again!
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+    - llava-1.5
+  overrides:
+    mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf
+    parameters:
+      model: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf
+  files:
+    - filename: Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf
+      sha256: dafc63f8821ad7d8039fa466963626470c7a82fb85beacacc6789574892ef345
+      uri: huggingface://mradermacher/Poppy_Porpoise-1.30-L3-8B-i1-GGUF/Poppy_Porpoise-1.30-L3-8B.i1-Q4_K_M.gguf
+    - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
+      sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
+      uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
+- !!merge <<: *llama3
+  name: "poppy_porpoise-v1.4-l3-8b-iq-imatrix"
+  urls:
+    - https://huggingface.co/mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF
+  description: |
+    "Poppy Porpoise" is a cutting-edge AI roleplay assistant based on the Llama 3 8B model, specializing in crafting unforgettable narrative experiences. With its advanced language capabilities, Poppy expertly immerses users in an interactive and engaging adventure, tailoring each adventure to their individual preferences.
+
+    Update: Vision/multimodal capabilities again!
+  icon: https://cdn-uploads.huggingface.co/production/uploads/642265bc01c62c1e4102dc36/Boje781GkTdYgORTYGI6r.png
+  tags:
+    - llm
+    - multimodal
+    - gguf
+    - gpu
+    - llama3
+    - cpu
+    - llava-1.5
+  overrides:
+    mmproj: Llama-3-Update-2.0-mmproj-model-f16.gguf
+    parameters:
+      model: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf
+  files:
+    - filename: Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf
+      sha256: b6582804d74b357d63d2e0db496c1cc080aaa37d63dbeac91a4c59ac1e2e683b
+      uri: huggingface://mradermacher/Poppy_Porpoise-1.4-L3-8B-GGUF/Poppy_Porpoise-1.4-L3-8B.Q4_K_M.gguf
+    - filename: Llama-3-Update-2.0-mmproj-model-f16.gguf
+      sha256: 1058494004dfa121439d5a75fb96ea814c7a5937c0529998bf2366f2179bb5ba
+      uri: huggingface://Nitral-AI/Llama-3-Update-2.0-mmproj-model-f16/Llama-3-Update-2.0-mmproj-model-f16.gguf
 - !!merge <<: *llama3
   name: "bunny-llama-3-8b-v"
   urls:

From 148adebe1695cfe8ceb2eca858f2ec462465b125 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Jun 2024 16:58:53 +0200
Subject: [PATCH 71/80] docs: fix p2p commands (#2472)

Also change icons on GPT vision page

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 docs/content/docs/features/distributed_inferencing.md | 4 ++--
 docs/content/docs/features/gpt-vision.md              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/content/docs/features/distributed_inferencing.md b/docs/content/docs/features/distributed_inferencing.md
index b3b84528..e7220a81 100644
--- a/docs/content/docs/features/distributed_inferencing.md
+++ b/docs/content/docs/features/distributed_inferencing.md
@@ -20,7 +20,7 @@ This functionality enables LocalAI to distribute inference requests across multi
 To start workers for distributing the computational load, run:
 
 ```bash
-local-ai llamacpp-worker <listening_address> <listening_port>
+local-ai worker llama-cpp-rpc <listening_address> <listening_port>
 ```
 
 Alternatively, you can build the RPC server following the llama.cpp [README](https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md), which is compatible with LocalAI.
@@ -71,7 +71,7 @@ To reuse the same token later, restart the server with `--p2ptoken` or `P2P_TOKE
 2. Start the workers. Copy the `local-ai` binary to other hosts and run as many workers as needed using the token:
 
 ```bash
-TOKEN=XXX ./local-ai p2p-llama-cpp-rpc
+TOKEN=XXX ./local-ai worker p2p-llama-cpp-rpc
 # 1:06AM INF loading environment variables from file envFile=.env
 # 1:06AM INF Setting logging to info
 # {"level":"INFO","time":"2024-05-19T01:06:01.794+0200","caller":"config/config.go:288","message":"connmanager disabled\n"}
diff --git a/docs/content/docs/features/gpt-vision.md b/docs/content/docs/features/gpt-vision.md
index 9e021273..1fc4307f 100644
--- a/docs/content/docs/features/gpt-vision.md
+++ b/docs/content/docs/features/gpt-vision.md
@@ -1,7 +1,7 @@
 
 +++
 disableToc = false
-title = "🆕 GPT Vision"
+title = "🥽 GPT Vision"
 weight = 14
 url = "/features/gpt-vision/"
 +++

From 34527737bb11995914ab08d224f07e4bc67d4be0 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Jun 2024 17:07:26 +0200
Subject: [PATCH 72/80] feat(webui): enhance card visibility (#2473)

Do not let the description text to clutter, also highlight the model
names

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/elements/gallery.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/http/elements/gallery.go b/core/http/elements/gallery.go
index 7ca34aef..c37cba31 100644
--- a/core/http/elements/gallery.go
+++ b/core/http/elements/gallery.go
@@ -243,13 +243,13 @@ func ListModels(models []*gallery.GalleryModel, processing *xsync.SyncedMap[stri
 			},
 			elem.H5(
 				attrs.Props{
-					"class": "mb-2 text-xl font-medium leading-tight",
+					"class": "mb-2 text-xl font-bold leading-tight",
 				},
 				elem.Text(m.Name),
 			),
 			elem.P(
 				attrs.Props{
-					"class": "mb-4 text-base",
+					"class": "mb-4 text-sm [&:not(:hover)]:truncate text-base",
 				},
 				elem.Text(m.Description),
 			),

From daa7544d9ce3e05a60d45eb2da4c29b03547c9d7 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Mon, 3 Jun 2024 19:55:01 +0200
Subject: [PATCH 73/80] Update README.md

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index efaa685c..ba7617bc 100644
--- a/README.md
+++ b/README.md
@@ -183,7 +183,7 @@ And a huge shout-out to individuals sponsoring the project by donating hardware
 
 LocalAI is a community-driven project created by [Ettore Di Giacinto](https://github.com/mudler/).
 
-MIT - Author Ettore Di Giacinto
+MIT - Author Ettore Di Giacinto <mudler@localai.io>
 
 ## 🙇 Acknowledgements
 

From 6ef78ef7f688583d14141f982bd181b10b7c8bb4 Mon Sep 17 00:00:00 2001
From: fakezeta <fakezeta@gmail.com>
Date: Mon, 3 Jun 2024 22:41:42 +0200
Subject: [PATCH 74/80] bugfix: CUDA acceleration not working (#2475)

* bugfix: CUDA acceleration not working

CUDA not working after #2286.
Refactored the code to be more polish

* Update requirements.txt

Missing imports

Signed-off-by: fakezeta <fakezeta@gmail.com>

* Update requirements.txt

Signed-off-by: fakezeta <fakezeta@gmail.com>

---------

Signed-off-by: fakezeta <fakezeta@gmail.com>
---
 backend/python/transformers/backend.py       | 13 ++++++-------
 backend/python/transformers/requirements.txt |  5 ++++-
 backend/python/transformers/run.sh           |  6 ++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py
index 10603d2e..6e809f28 100644
--- a/backend/python/transformers/backend.py
+++ b/backend/python/transformers/backend.py
@@ -21,10 +21,7 @@ import torch.cuda
 
 
 XPU=os.environ.get("XPU", "0") == "1"
-if XPU:
-    from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
-else:
-    from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, set_seed, BitsAndBytesConfig, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
+from transformers import AutoTokenizer, AutoModel, set_seed, TextIteratorStreamer, StoppingCriteriaList, StopStringCriteria
 
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
@@ -77,11 +74,11 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         """
         model_name = request.Model
 
-        compute = "auto"
+        compute = torch.float16
         if request.F16Memory == True:
             compute=torch.bfloat16
 
-        self.CUDA = request.CUDA
+        self.CUDA = torch.cuda.is_available()
         self.OV=False
 
         device_map="cpu"
@@ -89,6 +86,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
         quantization = None
 
         if self.CUDA:
+            from transformers import BitsAndBytesConfig, AutoModelForCausalLM
             if request.MainGPU:
                 device_map=request.MainGPU
             else:
@@ -107,7 +105,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                     bnb_4bit_compute_dtype = None,
                     load_in_8bit=True,                                   
                 )
-                                               
+
         try:
             if request.Type == "AutoModelForCausalLM":
                 if XPU:
@@ -189,6 +187,7 @@ class BackendServicer(backend_pb2_grpc.BackendServicer):
                                                                 device=device_map)
                 self.OV = True
             else:
+                print("Automodel", file=sys.stderr)
                 self.model = AutoModel.from_pretrained(model_name, 
                                                        trust_remote_code=request.TrustRemoteCode,  
                                                        use_safetensors=True,  
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
index 5f4f4687..494a53fc 100644
--- a/backend/python/transformers/requirements.txt
+++ b/backend/python/transformers/requirements.txt
@@ -3,4 +3,7 @@ transformers
 grpcio==1.64.0
 protobuf
 torch
-certifi
\ No newline at end of file
+certifi
+intel-extension-for-transformers
+bitsandbytes
+setuptools==69.5.1 # https://github.com/mudler/LocalAI/issues/2406
diff --git a/backend/python/transformers/run.sh b/backend/python/transformers/run.sh
index 375c07e5..8ea92a27 100755
--- a/backend/python/transformers/run.sh
+++ b/backend/python/transformers/run.sh
@@ -1,4 +1,10 @@
 #!/bin/bash
 source $(dirname $0)/../common/libbackend.sh
 
+if [ -d "/opt/intel" ]; then
+    # Assumes we are using the Intel oneAPI container image
+    # https://github.com/intel/intel-extension-for-pytorch/issues/538
+    export XPU=1
+fi
+
 startBackend $@
\ No newline at end of file

From 67aa31faad7a39989cda5e64dda4dc88156ab122 Mon Sep 17 00:00:00 2001
From: "LocalAI [bot]" <139863280+localai-bot@users.noreply.github.com>
Date: Tue, 4 Jun 2024 01:09:24 +0200
Subject: [PATCH 75/80] :arrow_up: Update ggerganov/llama.cpp (#2477)

Signed-off-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: mudler <2420543+mudler@users.noreply.github.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 89a55e6d..f2c03086 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=7c4e5b7eae26581869e782015d9deca947c34997
+CPPLLAMA_VERSION?=bde7cd3cd949c1a85d3a199498ac98e78039d46f
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all

From 34ab442ce9ef2ef35e6cae4d5262b0210746d1c8 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Tue, 4 Jun 2024 02:39:19 -0400
Subject: [PATCH 76/80] toil: bump grpc version (#2480)

bump the grpc package version

---------

Signed-off-by: Dave Lee <dave@gray101.com>
---
 go.mod | 35 +++++++---------------
 go.sum | 92 +++++++++++++---------------------------------------------
 2 files changed, 30 insertions(+), 97 deletions(-)

diff --git a/go.mod b/go.mod
index c6f24e0c..690be3f1 100644
--- a/go.mod
+++ b/go.mod
@@ -8,10 +8,8 @@ require (
 	github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf
 	github.com/Masterminds/sprig/v3 v3.2.3
 	github.com/alecthomas/kong v0.9.0
-	github.com/census-instrumentation/opencensus-proto v0.4.1
 	github.com/charmbracelet/glamour v0.7.0
 	github.com/chasefleming/elem-go v0.25.0
-	github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4
 	github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
 	github.com/elliotchance/orderedmap/v2 v2.2.0
 	github.com/fsnotify/fsnotify v1.7.0
@@ -22,8 +20,7 @@ require (
 	github.com/gofiber/fiber/v2 v2.52.4
 	github.com/gofiber/swagger v1.0.0
 	github.com/gofiber/template/html/v2 v2.1.1
-	github.com/google/uuid v1.5.0
-	github.com/grpc-ecosystem/grpc-gateway v1.16.0
+	github.com/google/uuid v1.6.0
 	github.com/hpcloud/tail v1.0.0
 	github.com/imdario/mergo v0.3.16
 	github.com/ipfs/go-log v1.0.5
@@ -56,16 +53,13 @@ require (
 	go.opentelemetry.io/otel/exporters/prometheus v0.42.0
 	go.opentelemetry.io/otel/metric v1.19.0
 	go.opentelemetry.io/otel/sdk/metric v1.19.0
-	google.golang.org/api v0.126.0
-	google.golang.org/grpc v1.59.0
-	google.golang.org/protobuf v1.33.0
+	google.golang.org/grpc v1.64.0
+	google.golang.org/protobuf v1.34.1
 	gopkg.in/yaml.v2 v2.4.0
 	gopkg.in/yaml.v3 v3.0.1
 )
 
 require (
-	cloud.google.com/go/compute v1.23.0 // indirect
-	cloud.google.com/go/compute/metadata v0.2.3 // indirect
 	github.com/benbjohnson/clock v1.3.5 // indirect
 	github.com/c-robinson/iplib v1.0.8 // indirect
 	github.com/containerd/cgroups v1.1.0 // indirect
@@ -74,17 +68,12 @@ require (
 	github.com/davidlazar/go-crypto v0.0.0-20200604182044-b73af7476f6c // indirect
 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
 	github.com/elastic/gosigar v0.14.2 // indirect
-	github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect
 	github.com/flynn/noise v1.0.0 // indirect
 	github.com/francoispqt/gojay v1.2.13 // indirect
 	github.com/godbus/dbus/v5 v5.1.0 // indirect
-	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
 	github.com/golang/mock v1.6.0 // indirect
 	github.com/google/btree v1.1.2 // indirect
 	github.com/google/gopacket v1.1.19 // indirect
-	github.com/google/s2a-go v0.1.4 // indirect
-	github.com/googleapis/enterprise-certificate-proxy v0.2.3 // indirect
-	github.com/googleapis/gax-go/v2 v2.11.0 // indirect
 	github.com/gorilla/websocket v1.5.0 // indirect
 	github.com/hashicorp/errwrap v1.1.0 // indirect
 	github.com/hashicorp/go-multierror v1.1.1 // indirect
@@ -153,16 +142,12 @@ require (
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect
 	golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 // indirect
-	golang.org/x/oauth2 v0.11.0 // indirect
 	golang.org/x/sync v0.6.0 // indirect
-	golang.org/x/sys v0.19.0 // indirect
+	golang.org/x/sys v0.20.0 // indirect
 	golang.zx2c4.com/wintun v0.0.0-20211104114900-415007cec224 // indirect
 	golang.zx2c4.com/wireguard v0.0.0-20220703234212-c31a7b1ab478 // indirect
 	golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
 	gonum.org/v1/gonum v0.13.0 // indirect
-	google.golang.org/appengine v1.6.7 // indirect
-	google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect
 	lukechampine.com/blake3 v1.2.1 // indirect
 )
 
@@ -204,7 +189,7 @@ require (
 	github.com/gofiber/template v1.8.3 // indirect
 	github.com/gofiber/utils v1.1.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
-	github.com/golang/protobuf v1.5.3
+	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/golang/snappy v0.0.2 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f // indirect
@@ -264,13 +249,13 @@ require (
 	github.com/yusufpapurcu/wmi v1.2.3 // indirect
 	go.opentelemetry.io/otel/sdk v1.19.0 // indirect
 	go.opentelemetry.io/otel/trace v1.19.0 // indirect
-	golang.org/x/crypto v0.22.0 // indirect
+	golang.org/x/crypto v0.23.0 // indirect
 	golang.org/x/mod v0.16.0 // indirect
-	golang.org/x/net v0.24.0 // indirect
-	golang.org/x/term v0.19.0 // indirect
-	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/net v0.25.0 // indirect
+	golang.org/x/term v0.20.0 // indirect
+	golang.org/x/text v0.15.0 // indirect
 	golang.org/x/tools v0.19.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
 	howett.net/plist v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index 50585d21..4bfcb14c 100644
--- a/go.sum
+++ b/go.sum
@@ -2,10 +2,6 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 cloud.google.com/go v0.31.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.37.0/go.mod h1:TS1dMSSfndXH133OKGwekG838Om/cQT0BUHV3HcBgoo=
-cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY=
-cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM=
-cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
-cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
 dmitri.shuralyov.com/app/changes v0.0.0-20180602232624-0a106ad413e3/go.mod h1:Yl+fi1br7+Rr3LqpNJf1/uxUdtRUV+Tnj0o93V2B9MU=
 dmitri.shuralyov.com/html/belt v0.0.0-20180602232347-f7d459c86be0/go.mod h1:JLBrvjyP0v+ecvNYvCpyZgu5/xkfAUhi6wJj28eUfSU=
 dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1:a1inKt/atXimZ4Mv927x+r7UpyzRUf4emIoiiSC2TN4=
@@ -42,7 +38,6 @@ github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu
 github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
 github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
 github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
-github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
 github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
 github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
@@ -61,9 +56,6 @@ github.com/c-robinson/iplib v1.0.8/go.mod h1:i3LuuFL1hRT5gFpBRnEydzw8R6yhGkF4szN
 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
 github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
-github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
-github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
-github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/charmbracelet/glamour v0.7.0 h1:2BtKGZ4iVJCDfMF229EzbeR1QRKLWztO9dMtjmqZSng=
@@ -73,13 +65,6 @@ github.com/chasefleming/elem-go v0.25.0/go.mod h1:hz73qILBIKnTgOujnSMtEj20/epI+f
 github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs=
 github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
 github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
-github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
-github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI=
-github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k=
-github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
 github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE=
 github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaDFBM=
 github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw=
@@ -131,11 +116,7 @@ github.com/elliotchance/orderedmap/v2 v2.2.0/go.mod h1:85lZyVbpGaGvHvnKa7Qhx7znc
 github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
 github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
-github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
-github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
 github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
-github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA=
-github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
 github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ=
 github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag=
@@ -217,7 +198,6 @@ github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+Licev
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
-github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
 github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
 github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
 github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
@@ -228,8 +208,8 @@ github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw
 github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
-github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.2 h1:aeE13tS0IiQgFjYdoL8qN3K1N2bXXtI6Vi51/y7BpMw=
 github.com/golang/snappy v0.0.2/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
@@ -256,20 +236,14 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
 github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f h1:pDhu5sgp8yJlEF/g6osliIIpF9K4F5jvkULXa4daRDQ=
 github.com/google/pprof v0.0.0-20230821062121-407c9e7a662f/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
-github.com/google/s2a-go v0.1.4 h1:1kZ/sQM3srePvKs3tXAvQzo66XfcReoqFpIpIccE7Oc=
-github.com/google/s2a-go v0.1.4/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
-github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/googleapis/enterprise-certificate-proxy v0.2.3 h1:yk9/cqRKtT9wXZSsRH9aurXEpJX+U6FLtpYTdC3R06k=
-github.com/googleapis/enterprise-certificate-proxy v0.2.3/go.mod h1:AwSRAtLfXpU5Nm3pW+v7rGDHp09LsPtGY9MduiEsR9k=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY=
 github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg=
-github.com/googleapis/gax-go/v2 v2.11.0 h1:9V9PWXEsWnPpQhu/PeQIkS4eGzMlTLGgt80cUUI8Ki4=
-github.com/googleapis/gax-go/v2 v2.11.0/go.mod h1:DxmR61SGKkGLa2xigwuZIQpkCI2S5iydzRfb3peWZJI=
 github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
 github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
@@ -279,8 +253,6 @@ github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWm
 github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
 github.com/grpc-ecosystem/grpc-gateway v1.5.0/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
-github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
 github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
 github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
 github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
@@ -587,7 +559,6 @@ github.com/raulk/go-watchdog v1.3.0/go.mod h1:fIvOnLbF0b0ZwkB9YU4mOW9Did//4vPZtD
 github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
 github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
@@ -738,7 +709,6 @@ go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8
 go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY=
 go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
 go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
-go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
 go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ=
 go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
 go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
@@ -771,10 +741,9 @@ golang.org/x/crypto v0.0.0-20200602180216-279210d13fed/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
-golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
 golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
-golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
-golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
+golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
@@ -802,11 +771,9 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
-golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
-golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
 golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
@@ -814,18 +781,14 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
 golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
-golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
-golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
-golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
+golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181017192945-9dcd33a902f4/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20181203162652-d668ce993890/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU=
-golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk=
 golang.org/x/perf v0.0.0-20180704124530-6e6d33e29852/go.mod h1:JLpeXjPJfIyPr5TlbXLkXWLhP8nz10XfvxElABhCtcw=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -877,24 +840,22 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
-golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
-golang.org/x/term v0.19.0 h1:+ThwsDv+tYfnJFhF4L8jITxu1tdTWRTZpdsWgEgjL6Q=
-golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
+golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
-golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
 golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -935,28 +896,19 @@ gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU
 google.golang.org/api v0.0.0-20180910000450-7ca32eb868bf/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.0.0-20181030000543-1d582fd0359e/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0=
 google.golang.org/api v0.1.0/go.mod h1:UGEZY7KEX120AnNLIHFMKIo4obdJhkp2tPbaPlQx13Y=
-google.golang.org/api v0.126.0 h1:q4GJq+cAdMAC7XP7njvQ4tvohGLiSlytuL4BQxbIZ+o=
-google.golang.org/api v0.126.0/go.mod h1:mBwVAtz+87bEN6CbA1GtZPDOqY2R5ONPqJeIlvyo4Aw=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
-google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
-google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20181029155118-b69ba1387ce2/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20181202183823-bd91e49a0898/go.mod h1:7Ep/1NZk928CDR8SjdVbjWNpdIf6nzjE3BTgJDr2Atg=
 google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
 google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
-google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
 google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
-google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d h1:VBu5YqKPv6XiJ199exd8Br+Aetz+o08F+PLMnwJQHAY=
-google.golang.org/genproto v0.0.0-20230822172742-b8732ec3820d/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4=
-google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q=
-google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 h1:Zy9XzmMEflZ/MAaA7vNcoebnRAld7FsPW1EeBB7V0m8=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157/go.mod h1:EfXuqaE1J41VCDicxHzUDm+8rk+7ZdXzHV0IhO/I6s0=
 google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
 google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -964,12 +916,9 @@ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZi
 google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
 google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
-google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
 google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
-google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
-google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ=
-google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk=
-google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98=
+google.golang.org/grpc v1.64.0 h1:KH3VH9y/MgNQg1dE7b3XfVK0GsPSIzJwdF617gUSbvY=
+google.golang.org/grpc v1.64.0/go.mod h1:oxjF8E3FBnjp+/gVFYdWacaLDx9na1aqy9oovLpxQYg=
 google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
 google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -981,8 +930,8 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
-google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
+google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
@@ -997,7 +946,6 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD
 gopkg.in/yaml.v1 v1.0.0-20140924161607-9f9df34309c0/go.mod h1:WDnlLJ4WF5VGsH/HVa3CI79GS0ol3YnhVnKP89i0kNg=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

From 1ffee9989f0d9c4dfd40a62c19d21d7c4549f1fc Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 4 Jun 2024 15:23:00 +0200
Subject: [PATCH 77/80] README: update sponsors list (#2476)

Signed-off-by: Ettore Di Giacinto <mudler@users.noreply.github.com>
---
 README.md | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index ba7617bc..d6150c57 100644
--- a/README.md
+++ b/README.md
@@ -163,17 +163,16 @@ If you utilize this repository, data in a downstream project, please consider ci
 
 Support the project by becoming [a backer or sponsor](https://github.com/sponsors/mudler). Your logo will show up here with a link to your website.
 
-A huge thank you to our generous sponsors who support this project:
+A huge thank you to our generous sponsors who support this project covering CI expenses, and our [Sponsor list](https://github.com/sponsors/mudler):
 
-| ![Spectro Cloud logo_600x600px_transparent bg](https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512) |
-|:-----------------------------------------------:|
-|  [Spectro Cloud](https://www.spectrocloud.com/)  |
-|  Spectro Cloud kindly supports LocalAI by providing GPU and computing resources to run tests on lamdalabs!  |
-
-And a huge shout-out to individuals sponsoring the project by donating hardware or backing the project.
-
-- [Sponsor list](https://github.com/sponsors/mudler)
-- JDAM00 (donating HW for the CI)
+<p align="center">
+  <a href="https://www.spectrocloud.com/" target="blank">
+    <img height="200" src="https://github.com/go-skynet/LocalAI/assets/2420543/68a6f3cb-8a65-4a4d-99b5-6417a8905512">
+  </a>
+  <a href="https://www.premai.io/" target="blank">
+    <img height="200" src="https://github.com/mudler/LocalAI/assets/2420543/42e4ca83-661e-4f79-8e46-ae43689683d6"> <br>
+  </a>
+</p>
 
 ## 🌟 Star history
 

From bdd6769b2dcbb8f43cd9f51a53f7f8d05ffc83f3 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@users.noreply.github.com>
Date: Tue, 4 Jun 2024 15:23:29 +0200
Subject: [PATCH 78/80] feat(default): use number of physical cores as default
 (#2483)

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/cli/run.go                   | 2 +-
 core/config/application_config.go | 5 ++++-
 pkg/xsysinfo/cpu.go               | 7 +++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/core/cli/run.go b/core/cli/run.go
index 6c41f63b..009f5315 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -37,7 +37,7 @@ type RunCMD struct {
 	PreloadModelsConfig string   `env:"LOCALAI_PRELOAD_MODELS_CONFIG,PRELOAD_MODELS_CONFIG" help:"A List of models to apply at startup. Path to a YAML config file" group:"models"`
 
 	F16         bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
-	Threads     int  `env:"LOCALAI_THREADS,THREADS" short:"t" default:"4" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
+	Threads     int  `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
 	ContextSize int  `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
 
 	Address              string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
diff --git a/core/config/application_config.go b/core/config/application_config.go
index 398418ad..a71b6223 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"github.com/go-skynet/LocalAI/pkg/gallery"
+	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
 	"github.com/rs/zerolog/log"
 )
 
@@ -59,7 +60,6 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
 	opt := &ApplicationConfig{
 		Context:       context.Background(),
 		UploadLimitMB: 15,
-		Threads:       1,
 		ContextSize:   512,
 		Debug:         true,
 	}
@@ -213,6 +213,9 @@ func WithUploadLimitMB(limit int) AppOption {
 
 func WithThreads(threads int) AppOption {
 	return func(o *ApplicationConfig) {
+		if threads == 0 { // 0 is not allowed
+			threads = xsysinfo.CPUPhysicalCores()
+		}
 		o.Threads = threads
 	}
 }
diff --git a/pkg/xsysinfo/cpu.go b/pkg/xsysinfo/cpu.go
index e3066b56..b1ff20fe 100644
--- a/pkg/xsysinfo/cpu.go
+++ b/pkg/xsysinfo/cpu.go
@@ -36,3 +36,10 @@ func CPUCapabilities() ([]string, error) {
 func HasCPUCaps(ids ...cpuid.FeatureID) bool {
 	return cpuid.CPU.Supports(ids...)
 }
+
+func CPUPhysicalCores() int {
+	if cpuid.CPU.PhysicalCores == 0 {
+		return 1
+	}
+	return cpuid.CPU.PhysicalCores
+}

From 2fc6fe806b903ac0a70218b21b5c84443a1b0866 Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Tue, 4 Jun 2024 10:32:47 -0400
Subject: [PATCH 79/80] fix: `pkg/downloader` should respect basePath for
 `file://` urls (#2481)

* pass basePath down to pkg/downloader

Signed-off-by: Dave Lee <dave@gray101.com>

* enforce

Signed-off-by: Dave Lee <dave@gray101.com>

---------

Signed-off-by: Dave Lee <dave@gray101.com>
---
 core/http/app_test.go        |  3 ++-
 core/services/gallery.go     |  2 +-
 embedded/embedded.go         |  4 ++--
 pkg/downloader/uri.go        |  7 ++++++-
 pkg/downloader/uri_test.go   |  6 +++---
 pkg/gallery/gallery.go       | 10 +++++-----
 pkg/gallery/models.go        |  4 ++--
 pkg/gallery/request_test.go  |  2 +-
 pkg/startup/model_preload.go |  2 +-
 9 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/core/http/app_test.go b/core/http/app_test.go
index 5776b99a..6e9de246 100644
--- a/core/http/app_test.go
+++ b/core/http/app_test.go
@@ -73,7 +73,8 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }
 
 func getModels(url string) (response []gallery.GalleryModel) {
-	downloader.GetURI(url, func(url string, i []byte) error {
+	// TODO: No tests currently seem to exercise file:// urls. Fix?
+	downloader.GetURI(url, "", func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
diff --git a/core/services/gallery.go b/core/services/gallery.go
index ed6f6165..e20e733a 100644
--- a/core/services/gallery.go
+++ b/core/services/gallery.go
@@ -32,7 +32,7 @@ func NewGalleryService(modelPath string) *GalleryService {
 
 func prepareModel(modelPath string, req gallery.GalleryModel, cl *config.BackendConfigLoader, downloadStatus func(string, string, string, float64)) error {
 
-	config, err := gallery.GetGalleryConfigFromURL(req.URL)
+	config, err := gallery.GetGalleryConfigFromURL(req.URL, modelPath)
 	if err != nil {
 		return err
 	}
diff --git a/embedded/embedded.go b/embedded/embedded.go
index 438a1352..1fc59b4d 100644
--- a/embedded/embedded.go
+++ b/embedded/embedded.go
@@ -36,10 +36,10 @@ func init() {
 	}
 }
 
-func GetRemoteLibraryShorteners(url string) (map[string]string, error) {
+func GetRemoteLibraryShorteners(url string, basePath string) (map[string]string, error) {
 	remoteLibrary := map[string]string{}
 
-	err := downloader.GetURI(url, func(_ string, i []byte) error {
+	err := downloader.GetURI(url, basePath, func(_ string, i []byte) error {
 		return yaml.Unmarshal(i, &remoteLibrary)
 	})
 	if err != nil {
diff --git a/pkg/downloader/uri.go b/pkg/downloader/uri.go
index 797a264b..0848a238 100644
--- a/pkg/downloader/uri.go
+++ b/pkg/downloader/uri.go
@@ -23,7 +23,7 @@ const (
 	GithubURI2        = "github://"
 )
 
-func GetURI(url string, f func(url string, i []byte) error) error {
+func GetURI(url string, basePath string, f func(url string, i []byte) error) error {
 	url = ConvertURL(url)
 
 	if strings.HasPrefix(url, "file://") {
@@ -33,6 +33,11 @@ func GetURI(url string, f func(url string, i []byte) error) error {
 		if err != nil {
 			return err
 		}
+		// Check if the local file is rooted in basePath
+		err = utils.VerifyPath(resolvedFile, basePath)
+		if err != nil {
+			return err
+		}
 		// Read the response body
 		body, err := os.ReadFile(resolvedFile)
 		if err != nil {
diff --git a/pkg/downloader/uri_test.go b/pkg/downloader/uri_test.go
index cd17b7ca..3ab04e56 100644
--- a/pkg/downloader/uri_test.go
+++ b/pkg/downloader/uri_test.go
@@ -10,7 +10,7 @@ var _ = Describe("Gallery API tests", func() {
 	Context("URI", func() {
 		It("parses github with a branch", func() {
 			Expect(
-				GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml", func(url string, i []byte) error {
+				GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml", "", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
@@ -18,7 +18,7 @@ var _ = Describe("Gallery API tests", func() {
 		})
 		It("parses github without a branch", func() {
 			Expect(
-				GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml@main", func(url string, i []byte) error {
+				GetURI("github:go-skynet/model-gallery/gpt4all-j.yaml@main", "", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
@@ -26,7 +26,7 @@ var _ = Describe("Gallery API tests", func() {
 		})
 		It("parses github with urls", func() {
 			Expect(
-				GetURI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", func(url string, i []byte) error {
+				GetURI("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml", "", func(url string, i []byte) error {
 					Expect(url).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/gpt4all-j.yaml"))
 					return nil
 				}),
diff --git a/pkg/gallery/gallery.go b/pkg/gallery/gallery.go
index 6202529a..0e9daa79 100644
--- a/pkg/gallery/gallery.go
+++ b/pkg/gallery/gallery.go
@@ -27,7 +27,7 @@ func InstallModelFromGallery(galleries []Gallery, name string, basePath string,
 
 		if len(model.URL) > 0 {
 			var err error
-			config, err = GetGalleryConfigFromURL(model.URL)
+			config, err = GetGalleryConfigFromURL(model.URL, basePath)
 			if err != nil {
 				return err
 			}
@@ -142,9 +142,9 @@ func AvailableGalleryModels(galleries []Gallery, basePath string) ([]*GalleryMod
 	return models, nil
 }
 
-func findGalleryURLFromReferenceURL(url string) (string, error) {
+func findGalleryURLFromReferenceURL(url string, basePath string) (string, error) {
 	var refFile string
-	err := downloader.GetURI(url, func(url string, d []byte) error {
+	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
 		refFile = string(d)
 		if len(refFile) == 0 {
 			return fmt.Errorf("invalid reference file at url %s: %s", url, d)
@@ -161,13 +161,13 @@ func getGalleryModels(gallery Gallery, basePath string) ([]*GalleryModel, error)
 
 	if strings.HasSuffix(gallery.URL, ".ref") {
 		var err error
-		gallery.URL, err = findGalleryURLFromReferenceURL(gallery.URL)
+		gallery.URL, err = findGalleryURLFromReferenceURL(gallery.URL, basePath)
 		if err != nil {
 			return models, err
 		}
 	}
 
-	err := downloader.GetURI(gallery.URL, func(url string, d []byte) error {
+	err := downloader.GetURI(gallery.URL, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &models)
 	})
 	if err != nil {
diff --git a/pkg/gallery/models.go b/pkg/gallery/models.go
index e697fcd6..225097c0 100644
--- a/pkg/gallery/models.go
+++ b/pkg/gallery/models.go
@@ -63,9 +63,9 @@ type PromptTemplate struct {
 	Content string `yaml:"content"`
 }
 
-func GetGalleryConfigFromURL(url string) (Config, error) {
+func GetGalleryConfigFromURL(url string, basePath string) (Config, error) {
 	var config Config
-	err := downloader.GetURI(url, func(url string, d []byte) error {
+	err := downloader.GetURI(url, basePath, func(url string, d []byte) error {
 		return yaml.Unmarshal(d, &config)
 	})
 	if err != nil {
diff --git a/pkg/gallery/request_test.go b/pkg/gallery/request_test.go
index a9d54e32..af085e81 100644
--- a/pkg/gallery/request_test.go
+++ b/pkg/gallery/request_test.go
@@ -10,7 +10,7 @@ var _ = Describe("Gallery API tests", func() {
 	Context("requests", func() {
 		It("parses github with a branch", func() {
 			req := GalleryModel{URL: "github:go-skynet/model-gallery/gpt4all-j.yaml@main"}
-			e, err := GetGalleryConfigFromURL(req.URL)
+			e, err := GetGalleryConfigFromURL(req.URL, "")
 			Expect(err).ToNot(HaveOccurred())
 			Expect(e.Name).To(Equal("gpt4all-j"))
 		})
diff --git a/pkg/startup/model_preload.go b/pkg/startup/model_preload.go
index d267d846..240fc6bd 100644
--- a/pkg/startup/model_preload.go
+++ b/pkg/startup/model_preload.go
@@ -20,7 +20,7 @@ func PreloadModelsConfigurations(modelLibraryURL string, modelPath string, model
 		// As a best effort, try to resolve the model from the remote library
 		// if it's not resolved we try with the other method below
 		if modelLibraryURL != "" {
-			lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL)
+			lib, err := embedded.GetRemoteLibraryShorteners(modelLibraryURL, modelPath)
 			if err == nil {
 				if lib[url] != "" {
 					log.Debug().Msgf("[startup] model configuration is defined remotely: %s (%s)", url, lib[url])

From 4e1463fec291612a59a16db60b3fd12d4c49d64b Mon Sep 17 00:00:00 2001
From: Dave <dave@gray101.com>
Date: Tue, 4 Jun 2024 15:43:46 -0400
Subject: [PATCH 80/80] feat: fiber CSRF (#2482)

new config option - enables or disables the fiber csrf middleware

Signed-off-by: Dave Lee <dave@gray101.com>
---
 core/cli/run.go                   |  2 ++
 core/config/application_config.go |  7 +++++++
 core/http/app.go                  |  6 ++++++
 go.mod                            |  2 ++
 go.sum                            | 11 +++++++++++
 5 files changed, 28 insertions(+)

diff --git a/core/cli/run.go b/core/cli/run.go
index 009f5315..17fb79be 100644
--- a/core/cli/run.go
+++ b/core/cli/run.go
@@ -43,6 +43,7 @@ type RunCMD struct {
 	Address              string   `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
 	CORS                 bool     `env:"LOCALAI_CORS,CORS" help:"" group:"api"`
 	CORSAllowOrigins     string   `env:"LOCALAI_CORS_ALLOW_ORIGINS,CORS_ALLOW_ORIGINS" group:"api"`
+	CSRF                 bool     `env:"LOCALAI_CSRF" help:"Enables fiber CSRF middleware" group:"api"`
 	UploadLimit          int      `env:"LOCALAI_UPLOAD_LIMIT,UPLOAD_LIMIT" default:"15" help:"Default upload-limit in MB" group:"api"`
 	APIKeys              []string `env:"LOCALAI_API_KEY,API_KEY" help:"List of API Keys to enable API authentication. When this is set, all the requests must be authenticated with one of these API keys" group:"api"`
 	DisableWebUI         bool     `env:"LOCALAI_DISABLE_WEBUI,DISABLE_WEBUI" default:"false" help:"Disable webui" group:"api"`
@@ -77,6 +78,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
 		config.WithModelLibraryURL(r.RemoteLibrary),
 		config.WithCors(r.CORS),
 		config.WithCorsAllowOrigins(r.CORSAllowOrigins),
+		config.WithCsrf(r.CSRF),
 		config.WithThreads(r.Threads),
 		config.WithBackendAssets(ctx.BackendAssets),
 		config.WithBackendAssetsOutput(r.BackendAssetsPath),
diff --git a/core/config/application_config.go b/core/config/application_config.go
index a71b6223..9f563842 100644
--- a/core/config/application_config.go
+++ b/core/config/application_config.go
@@ -26,6 +26,7 @@ type ApplicationConfig struct {
 	DynamicConfigsDir                   string
 	DynamicConfigsDirPollInterval       time.Duration
 	CORS                                bool
+	CSRF                                bool
 	PreloadJSONModels                   string
 	PreloadModelsFromPath               string
 	CORSAllowOrigins                    string
@@ -87,6 +88,12 @@ func WithCors(b bool) AppOption {
 	}
 }
 
+func WithCsrf(b bool) AppOption {
+	return func(o *ApplicationConfig) {
+		o.CSRF = b
+	}
+}
+
 func WithModelLibraryURL(url string) AppOption {
 	return func(o *ApplicationConfig) {
 		o.ModelLibraryURL = url
diff --git a/core/http/app.go b/core/http/app.go
index de31346b..1ffd6b45 100644
--- a/core/http/app.go
+++ b/core/http/app.go
@@ -20,6 +20,7 @@ import (
 	"github.com/gofiber/contrib/fiberzerolog"
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
+	"github.com/gofiber/fiber/v2/middleware/csrf"
 	"github.com/gofiber/fiber/v2/middleware/favicon"
 	"github.com/gofiber/fiber/v2/middleware/filesystem"
 	"github.com/gofiber/fiber/v2/middleware/recover"
@@ -167,6 +168,11 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
 		app.Use(c)
 	}
 
+	if appConfig.CSRF {
+		log.Debug().Msg("Enabling CSRF middleware. Tokens are now required for state-modifying requests")
+		app.Use(csrf.New())
+	}
+
 	// Load config jsons
 	utils.LoadConfig(appConfig.UploadDir, openai.UploadedFilesFile, &openai.UploadedFiles)
 	utils.LoadConfig(appConfig.ConfigsDir, openai.AssistantsConfigFile, &openai.Assistants)
diff --git a/go.mod b/go.mod
index 690be3f1..393608d5 100644
--- a/go.mod
+++ b/go.mod
@@ -125,6 +125,7 @@ require (
 	github.com/opentracing/opentracing-go v1.2.0 // indirect
 	github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
+	github.com/philhofer/fwd v1.1.2 // indirect
 	github.com/polydawn/refmt v0.89.0 // indirect
 	github.com/quic-go/qpack v0.4.0 // indirect
 	github.com/quic-go/qtls-go1-20 v0.3.3 // indirect
@@ -133,6 +134,7 @@ require (
 	github.com/raulk/go-watchdog v1.3.0 // indirect
 	github.com/songgao/packets v0.0.0-20160404182456-549a10cd4091 // indirect
 	github.com/spaolacci/murmur3 v1.1.0 // indirect
+	github.com/tinylib/msgp v1.1.8 // indirect
 	github.com/vishvananda/netlink v1.1.0 // indirect
 	github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 // indirect
 	github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect
diff --git a/go.sum b/go.sum
index 4bfcb14c..792b9175 100644
--- a/go.sum
+++ b/go.sum
@@ -520,6 +520,8 @@ github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+v
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5 h1:Ii+DKncOVM8Cu1Hc+ETb5K+23HdAMvESYE3ZJ5b5cMI=
 github.com/phayes/freeport v0.0.0-20220201140144-74d24b5ae9f5/go.mod h1:iIss55rKnNBTvrwdmkUpLnDpZoAHvWaiq5+iMmen4AE=
+github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
+github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
 github.com/pierrec/lz4/v4 v4.1.2 h1:qvY3YFXRQE/XB8MlLzJH7mSzBs74eA2gg52YTk6jUPM=
 github.com/pierrec/lz4/v4 v4.1.2/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -645,6 +647,8 @@ github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0J
 github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
 github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
 github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA=
+github.com/tinylib/msgp v1.1.8 h1:FCXC1xanKO4I8plpHGH2P7koL/RzZs12l/+r7vakfm0=
+github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
 github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
 github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
 github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI=
@@ -759,6 +763,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
 golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -783,6 +788,7 @@ golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT
 golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
+golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE=
 golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
 golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -799,6 +805,7 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
 golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -834,6 +841,7 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -845,6 +853,7 @@ golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
+golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
 golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
 golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
@@ -854,6 +863,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -879,6 +889,7 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ=
 golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
 golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=