mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
feat: add bert.cpp embeddings (#222)
This commit is contained in:
parent
e6db14e2f1
commit
f8ee20991c
3
.github/workflows/bump_deps.yaml
vendored
3
.github/workflows/bump_deps.yaml
vendored
@ -24,6 +24,9 @@ jobs:
|
|||||||
- repository: "ggerganov/whisper.cpp"
|
- repository: "ggerganov/whisper.cpp"
|
||||||
variable: "WHISPER_CPP_VERSION"
|
variable: "WHISPER_CPP_VERSION"
|
||||||
branch: "master"
|
branch: "master"
|
||||||
|
- repository: "go-skynet/go-bert.cpp"
|
||||||
|
variable: "BERT_VERSION"
|
||||||
|
branch: "master"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
25
Makefile
25
Makefile
@ -9,6 +9,7 @@ GOGPT2_VERSION?=245a5bfe6708ab80dc5c733dcdbfbe3cfd2acdaa
|
|||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
|
RWKV_VERSION?=af62fcc432be2847acb6e0688b2c2491d6588d58
|
||||||
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
||||||
|
BERT_VERSION?=ec771ec715576ac050263bb7bb74bfd616a5ba13
|
||||||
|
|
||||||
|
|
||||||
GREEN := $(shell tput -Txterm setaf 2)
|
GREEN := $(shell tput -Txterm setaf 2)
|
||||||
@ -17,8 +18,8 @@ WHITE := $(shell tput -Txterm setaf 7)
|
|||||||
CYAN := $(shell tput -Txterm setaf 6)
|
CYAN := $(shell tput -Txterm setaf 6)
|
||||||
RESET := $(shell tput -Txterm sgr0)
|
RESET := $(shell tput -Txterm sgr0)
|
||||||
|
|
||||||
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp
|
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert
|
||||||
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp
|
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/go-gpt4all-j:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert
|
||||||
|
|
||||||
# Use this if you want to set the default behavior
|
# Use this if you want to set the default behavior
|
||||||
ifndef BUILD_TYPE
|
ifndef BUILD_TYPE
|
||||||
@ -49,6 +50,14 @@ go-gpt4all-j:
|
|||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/void replace/void json_gptj_replace/g' {} +
|
||||||
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
@find ./go-gpt4all-j -type f -name "*.cpp" -exec sed -i'' -e 's/::replace/::json_gptj_replace/g' {} +
|
||||||
|
|
||||||
|
## BERT embeddings
|
||||||
|
go-bert:
|
||||||
|
git clone --recurse-submodules https://github.com/go-skynet/go-bert.cpp go-bert
|
||||||
|
cd go-bert && git checkout -b build $(BERT_VERSION) && git submodule update --init --recursive --depth 1
|
||||||
|
@find ./go-bert -type f -name "*.c" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||||
|
@find ./go-bert -type f -name "*.cpp" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||||
|
@find ./go-bert -type f -name "*.h" -exec sed -i'' -e 's/ggml_/ggml_bert_/g' {} +
|
||||||
|
|
||||||
## RWKV
|
## RWKV
|
||||||
go-rwkv:
|
go-rwkv:
|
||||||
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
git clone --recurse-submodules $(RWKV_REPO) go-rwkv
|
||||||
@ -60,6 +69,9 @@ go-rwkv:
|
|||||||
go-rwkv/librwkv.a: go-rwkv
|
go-rwkv/librwkv.a: go-rwkv
|
||||||
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
|
cd go-rwkv && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a .. && cp ggml/src/libggml.a ..
|
||||||
|
|
||||||
|
go-bert/libgobert.a: go-bert
|
||||||
|
$(MAKE) -C go-bert libgobert.a
|
||||||
|
|
||||||
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
go-gpt4all-j/libgptj.a: go-gpt4all-j
|
||||||
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
$(MAKE) -C go-gpt4all-j $(GENERIC_PREFIX)libgptj.a
|
||||||
|
|
||||||
@ -98,8 +110,9 @@ replace:
|
|||||||
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-gpt2.cpp=$(shell pwd)/go-gpt2
|
||||||
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(shell pwd)/go-rwkv
|
||||||
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(shell pwd)/whisper.cpp
|
||||||
|
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(shell pwd)/go-bert
|
||||||
|
|
||||||
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp
|
prepare-sources: go-llama go-gpt2 go-gpt4all-j go-rwkv whisper.cpp go-bert
|
||||||
$(GOCMD) mod download
|
$(GOCMD) mod download
|
||||||
|
|
||||||
## GENERIC
|
## GENERIC
|
||||||
@ -109,15 +122,17 @@ rebuild: ## Rebuilds the project
|
|||||||
$(MAKE) -C go-gpt2 clean
|
$(MAKE) -C go-gpt2 clean
|
||||||
$(MAKE) -C go-rwkv clean
|
$(MAKE) -C go-rwkv clean
|
||||||
$(MAKE) -C whisper.cpp clean
|
$(MAKE) -C whisper.cpp clean
|
||||||
|
$(MAKE) -C go-bert clean
|
||||||
$(MAKE) build
|
$(MAKE) build
|
||||||
|
|
||||||
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building
|
prepare: prepare-sources go-llama/libbinding.a go-gpt4all-j/libgptj.a go-bert/libgobert.a go-gpt2/libgpt2.a go-rwkv/librwkv.a whisper.cpp/libwhisper.a replace ## Prepares for building
|
||||||
|
|
||||||
clean: ## Remove build related file
|
clean: ## Remove build related file
|
||||||
rm -fr ./go-llama
|
rm -fr ./go-llama
|
||||||
rm -rf ./go-gpt4all-j
|
rm -rf ./go-gpt4all-j
|
||||||
rm -rf ./go-gpt2
|
rm -rf ./go-gpt2
|
||||||
rm -rf ./go-rwkv
|
rm -rf ./go-rwkv
|
||||||
|
rm -rf ./go-bert
|
||||||
rm -rf $(BINARY_NAME)
|
rm -rf $(BINARY_NAME)
|
||||||
|
|
||||||
## Build:
|
## Build:
|
||||||
@ -141,7 +156,7 @@ test-models/testmodel:
|
|||||||
|
|
||||||
test: prepare test-models/testmodel
|
test: prepare test-models/testmodel
|
||||||
cp tests/fixtures/* test-models
|
cp tests/fixtures/* test-models
|
||||||
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./...
|
@C_INCLUDE_PATH=${C_INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo -v -r ./api
|
||||||
|
|
||||||
## Help:
|
## Help:
|
||||||
help: ## Show this help.
|
help: ## Show this help.
|
||||||
|
13
README.md
13
README.md
@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)
|
[![](https://dcbadge.vercel.app/api/server/uJAeKSAGDy?style=flat-square&theme=default-inverted)](https://discord.gg/uJAeKSAGDy)
|
||||||
|
|
||||||
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp) and [ggml](https://github.com/ggerganov/ggml), including support GPT4ALL-J which is licensed under Apache 2.0.
|
**LocalAI** is a drop-in replacement REST API compatible with OpenAI for local CPU inferencing. It allows to run models locally or on-prem with consumer grade hardware, supporting multiple models families. Supports also GPT4ALL-J which is licensed under Apache 2.0.
|
||||||
|
|
||||||
- OpenAI compatible API
|
- OpenAI compatible API
|
||||||
- Supports multiple models
|
- Supports multiple models
|
||||||
@ -19,10 +19,14 @@
|
|||||||
|
|
||||||
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
LocalAI is a community-driven project, focused on making the AI accessible to anyone. Any contribution, feedback and PR is welcome! It was initially created by [mudler](https://github.com/mudler/) at the [SpectroCloud OSS Office](https://github.com/spectrocloud).
|
||||||
|
|
||||||
|
LocalAI uses C++ bindings for optimizing speed. It is based on [llama.cpp](https://github.com/ggerganov/llama.cpp), [gpt4all](https://github.com/nomic-ai/gpt4all), [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp), [ggml](https://github.com/ggerganov/ggml), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) for audio transcriptions, and [bert.cpp](https://github.com/skeskinen/bert.cpp) for embedding.
|
||||||
|
|
||||||
See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
See [examples on how to integrate LocalAI](https://github.com/go-skynet/LocalAI/tree/master/examples/).
|
||||||
|
|
||||||
## News
|
## News
|
||||||
|
|
||||||
|
- 10-05-2023: Added support for fast and accurate embeddings with `bert.cpp` ( https://github.com/go-skynet/LocalAI/pull/222 )
|
||||||
|
- 09-05-2023: Added experimental support for transcriptions endpoint ( https://github.com/go-skynet/LocalAI/pull/211 )
|
||||||
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
|
- 08-05-2023: Support for embeddings with models using the `llama.cpp` backend ( https://github.com/go-skynet/LocalAI/pull/207 )
|
||||||
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
- 02-05-2023: Support for `rwkv.cpp` models ( https://github.com/go-skynet/LocalAI/pull/158 ) and for `/edits` endpoint
|
||||||
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
|
- 01-05-2023: Support for SSE stream of tokens in `llama.cpp` backends ( https://github.com/go-skynet/LocalAI/pull/152 )
|
||||||
@ -534,18 +538,18 @@ curl http://localhost:8080/v1/models
|
|||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
|
||||||
The embedding endpoint is experimental and enabled only if the model is configured with `emebddings: true` in its `yaml` file, for example:
|
The embedding endpoint is experimental and enabled only if the model is configured with `embeddings: true` in its `yaml` file, for example:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
parameters:
|
parameters:
|
||||||
model: wizardLM-7B.ggml.q5_1.bin
|
model: bert
|
||||||
embeddings: true
|
embeddings: true
|
||||||
```
|
```
|
||||||
|
|
||||||
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
|
||||||
|
|
||||||
Note: embeddings is supported only with `llama.cpp` compatible models. (doesn't work with gpt4-all-j, yet).
|
Note: embeddings is supported only with `llama.cpp` compatible models and `bert` models. bert is more performant and available independently of the LLM model.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@ -667,6 +671,7 @@ MIT
|
|||||||
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
- [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||||
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
|
- [go-skynet/go-gpt4all-j.cpp](https://github.com/go-skynet/go-gpt4all-j.cpp)
|
||||||
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
|
- [go-skynet/go-gpt2.cpp](https://github.com/go-skynet/go-gpt2.cpp)
|
||||||
|
- [go-skynet/go-bert.cpp](https://github.com/go-skynet/go-bert.cpp)
|
||||||
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
|
- [donomii/go-rwkv.cpp](https://github.com/donomii/go-rwkv.cpp)
|
||||||
|
|
||||||
## Acknowledgements
|
## Acknowledgements
|
||||||
|
@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
|
|||||||
It("returns errors", func() {
|
It("returns errors", func() {
|
||||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 5 errors occurred:"))
|
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 6 errors occurred:"))
|
||||||
})
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
|
|
||||||
"github.com/donomii/go-rwkv.cpp"
|
"github.com/donomii/go-rwkv.cpp"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||||
llama "github.com/go-skynet/go-llama.cpp"
|
llama "github.com/go-skynet/go-llama.cpp"
|
||||||
@ -62,6 +63,14 @@ func ModelEmbedding(s string, tokens []int, loader *model.ModelLoader, c Config)
|
|||||||
}
|
}
|
||||||
return model.Embeddings(s, predictOptions...)
|
return model.Embeddings(s, predictOptions...)
|
||||||
}
|
}
|
||||||
|
// bert embeddings
|
||||||
|
case *bert.Bert:
|
||||||
|
fn = func() ([]float32, error) {
|
||||||
|
if len(tokens) > 0 {
|
||||||
|
return nil, fmt.Errorf("embeddings endpoint for this model supports only string")
|
||||||
|
}
|
||||||
|
return model.Embeddings(s, bert.SetThreads(c.Threads))
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
fn = func() ([]float32, error) {
|
fn = func() ([]float32, error) {
|
||||||
return nil, fmt.Errorf("embeddings not supported by the backend")
|
return nil, fmt.Errorf("embeddings not supported by the backend")
|
||||||
|
@ -12,11 +12,7 @@ Summary of the steps:
|
|||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
For this in order to work, you will need LocalAI and a model compatible with the `llama.cpp` backend. This is will not work with gpt4all, however you can mix models (use a llama.cpp one to build the index database, and gpt4all to query it).
|
You will need a training data set. Copy that over `data`.
|
||||||
|
|
||||||
The example uses `WizardLM` for both embeddings and Q&A. Edit the config files in `models/` accordingly to specify the model you use (change `HERE` in the configuration files).
|
|
||||||
|
|
||||||
You will also need a training data set. Copy that over `data`.
|
|
||||||
|
|
||||||
## Setup
|
## Setup
|
||||||
|
|
||||||
@ -28,7 +24,8 @@ git clone https://github.com/go-skynet/LocalAI
|
|||||||
|
|
||||||
cd LocalAI/examples/query_data
|
cd LocalAI/examples/query_data
|
||||||
|
|
||||||
# Copy your models, edit config files accordingly
|
wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
|
||||||
|
wget https://gpt4all.io/models/ggml-gpt4all-j.bin -O models/ggml-gpt4all-j
|
||||||
|
|
||||||
# start with docker-compose
|
# start with docker-compose
|
||||||
docker-compose up -d --build
|
docker-compose up -d --build
|
||||||
|
@ -1,18 +1,6 @@
|
|||||||
name: text-embedding-ada-002
|
name: text-embedding-ada-002
|
||||||
parameters:
|
parameters:
|
||||||
model: HERE
|
model: bert
|
||||||
top_k: 80
|
|
||||||
temperature: 0.2
|
|
||||||
top_p: 0.7
|
|
||||||
context_size: 1024
|
|
||||||
threads: 14
|
threads: 14
|
||||||
stopwords:
|
backend: bert-embeddings
|
||||||
- "HUMAN:"
|
|
||||||
- "GPT:"
|
|
||||||
roles:
|
|
||||||
user: " "
|
|
||||||
system: " "
|
|
||||||
embeddings: true
|
embeddings: true
|
||||||
template:
|
|
||||||
completion: completion
|
|
||||||
chat: gpt4all
|
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
name: gpt-3.5-turbo
|
name: gpt-3.5-turbo
|
||||||
parameters:
|
parameters:
|
||||||
model: HERE
|
model: ggml-gpt4all-j
|
||||||
top_k: 80
|
top_k: 80
|
||||||
temperature: 0.2
|
temperature: 0.2
|
||||||
top_p: 0.7
|
top_p: 0.7
|
||||||
context_size: 1024
|
context_size: 1024
|
||||||
threads: 14
|
threads: 14
|
||||||
embeddings: true
|
|
||||||
stopwords:
|
stopwords:
|
||||||
- "HUMAN:"
|
- "HUMAN:"
|
||||||
- "GPT:"
|
- "GPT:"
|
||||||
@ -15,4 +14,4 @@ roles:
|
|||||||
system: " "
|
system: " "
|
||||||
template:
|
template:
|
||||||
completion: completion
|
completion: completion
|
||||||
chat: wizardlm
|
chat: gpt4all
|
@ -1,3 +0,0 @@
|
|||||||
{{.Input}}
|
|
||||||
|
|
||||||
### Response:
|
|
@ -13,7 +13,7 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
|||||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||||
|
|
||||||
# Configure prompt parameters and initialise helper
|
# Configure prompt parameters and initialise helper
|
||||||
max_input_size = 1024
|
max_input_size = 500
|
||||||
num_output = 256
|
num_output = 256
|
||||||
max_chunk_overlap = 20
|
max_chunk_overlap = 20
|
||||||
|
|
||||||
|
@ -13,15 +13,15 @@ base_path = os.environ.get('OPENAI_API_BASE', 'http://localhost:8080/v1')
|
|||||||
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_base=base_path))
|
||||||
|
|
||||||
# Configure prompt parameters and initialise helper
|
# Configure prompt parameters and initialise helper
|
||||||
max_input_size = 512
|
max_input_size = 400
|
||||||
num_output = 512
|
num_output = 400
|
||||||
max_chunk_overlap = 30
|
max_chunk_overlap = 30
|
||||||
|
|
||||||
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
|
||||||
|
|
||||||
# Load documents from the 'data' directory
|
# Load documents from the 'data' directory
|
||||||
documents = SimpleDirectoryReader('data').load_data()
|
documents = SimpleDirectoryReader('data').load_data()
|
||||||
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 512)
|
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, chunk_size_limit = 400)
|
||||||
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
|
||||||
index.storage_context.persist(persist_dir="./storage")
|
index.storage_context.persist(persist_dir="./storage")
|
||||||
|
|
||||||
|
1
go.mod
1
go.mod
@ -6,6 +6,7 @@ require (
|
|||||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
|
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be
|
||||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35
|
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35
|
||||||
github.com/go-audio/wav v1.1.0
|
github.com/go-audio/wav v1.1.0
|
||||||
|
github.com/go-skynet/go-bert.cpp v0.0.0-20230510101404-7bb183b147ea
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3
|
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3
|
||||||
|
13
go.sum
13
go.sum
@ -16,8 +16,6 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be h1:3Hic97PY6hcw/SY44RuR7kyONkxd744RFeRrqckzwNQ=
|
|
||||||
github.com/donomii/go-rwkv.cpp v0.0.0-20230503112711-af62fcc432be/go.mod h1:gWy7FIWioqYmYxkaoFyBnaKApeZVrUkHhv9EV9pz4dM=
|
|
||||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
|
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35 h1:sMg/SgnMPS/HNUO/2kGm72vl8R9TmNIwgLFr2TNwR3g=
|
||||||
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20230508180809-bf2449dfae35/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
|
||||||
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
|
||||||
@ -38,14 +36,6 @@ github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7
|
|||||||
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
|
||||||
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
||||||
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
|
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708 h1:cfOi4TWvQ6JsAm9Q1A8I8j9YfNy10bmIfwOiyGyU5wQ=
|
|
||||||
github.com/go-skynet/go-gpt2.cpp v0.0.0-20230422085954-245a5bfe6708/go.mod h1:1Wj/xbkMfwQSOrhNYK178IzqQHstZbRfhx4s8p1M5VM=
|
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c h1:48I7jpLNGiQeBmF0SFVVbREh8vlG0zN13v9LH5ctXis=
|
|
||||||
github.com/go-skynet/go-gpt4all-j.cpp v0.0.0-20230422090028-1f7bff57f66c/go.mod h1:5VZ9XbcINI0XcHhkcX8GPK8TplFGAzu1Hrg4tNiMCtI=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c h1:JoW2+LKrSemoV32QRwrEC5f53erym96NCsUSM3wSVbM=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230508165257-c03e8adbc45c/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3 h1:YNi1oetK5kGJoUgT3/r/Wj3XPOICWf3nwHsz5v89iSs=
|
|
||||||
github.com/go-skynet/go-llama.cpp v0.0.0-20230509080828-f4d26f43f1d3/go.mod h1:DLfsPD7tYYnpksERH83HSf7qVNW3FIwmz7/zfYO0/6I=
|
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
|
||||||
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
@ -197,8 +187,9 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T
|
|||||||
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
|
|
||||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b h1:QRR6H1YWRnHb4Y/HeNFCTJLFVxaq6wH4YuVdsUOr75U=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20200902074654-038fdea0a05b/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||||
|
@ -14,6 +14,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
|
||||||
rwkv "github.com/donomii/go-rwkv.cpp"
|
rwkv "github.com/donomii/go-rwkv.cpp"
|
||||||
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
||||||
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
gptj "github.com/go-skynet/go-gpt4all-j.cpp"
|
||||||
llama "github.com/go-skynet/go-llama.cpp"
|
llama "github.com/go-skynet/go-llama.cpp"
|
||||||
@ -22,13 +23,15 @@ import (
|
|||||||
type ModelLoader struct {
|
type ModelLoader struct {
|
||||||
ModelPath string
|
ModelPath string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
|
// TODO: this needs generics
|
||||||
models map[string]*llama.LLama
|
models map[string]*llama.LLama
|
||||||
gptmodels map[string]*gptj.GPTJ
|
gptmodels map[string]*gptj.GPTJ
|
||||||
gpt2models map[string]*gpt2.GPT2
|
gpt2models map[string]*gpt2.GPT2
|
||||||
gptstablelmmodels map[string]*gpt2.StableLM
|
gptstablelmmodels map[string]*gpt2.StableLM
|
||||||
rwkv map[string]*rwkv.RwkvState
|
rwkv map[string]*rwkv.RwkvState
|
||||||
promptsTemplates map[string]*template.Template
|
bert map[string]*bert.Bert
|
||||||
|
|
||||||
|
promptsTemplates map[string]*template.Template
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewModelLoader(modelPath string) *ModelLoader {
|
func NewModelLoader(modelPath string) *ModelLoader {
|
||||||
@ -39,6 +42,7 @@ func NewModelLoader(modelPath string) *ModelLoader {
|
|||||||
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
||||||
models: make(map[string]*llama.LLama),
|
models: make(map[string]*llama.LLama),
|
||||||
rwkv: make(map[string]*rwkv.RwkvState),
|
rwkv: make(map[string]*rwkv.RwkvState),
|
||||||
|
bert: make(map[string]*bert.Bert),
|
||||||
promptsTemplates: make(map[string]*template.Template),
|
promptsTemplates: make(map[string]*template.Template),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -156,6 +160,38 @@ func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, erro
|
|||||||
return model, err
|
return model, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
|
||||||
|
ml.mu.Lock()
|
||||||
|
defer ml.mu.Unlock()
|
||||||
|
|
||||||
|
// Check if we already have a loaded model
|
||||||
|
if !ml.ExistsInModelPath(modelName) {
|
||||||
|
return nil, fmt.Errorf("model does not exist")
|
||||||
|
}
|
||||||
|
|
||||||
|
if m, ok := ml.bert[modelName]; ok {
|
||||||
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load the model and keep it in memory for later use
|
||||||
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
||||||
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
||||||
|
|
||||||
|
model, err := bert.New(modelFile)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is a prompt template, load it
|
||||||
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ml.bert[modelName] = model
|
||||||
|
return model, err
|
||||||
|
}
|
||||||
|
|
||||||
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
||||||
ml.mu.Lock()
|
ml.mu.Lock()
|
||||||
defer ml.mu.Unlock()
|
defer ml.mu.Unlock()
|
||||||
@ -299,6 +335,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
|
|||||||
return ml.LoadGPT2Model(modelFile)
|
return ml.LoadGPT2Model(modelFile)
|
||||||
case "gptj":
|
case "gptj":
|
||||||
return ml.LoadGPTJModel(modelFile)
|
return ml.LoadGPTJModel(modelFile)
|
||||||
|
case "bert-embeddings":
|
||||||
|
return ml.LoadBERT(modelFile)
|
||||||
case "rwkv":
|
case "rwkv":
|
||||||
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
||||||
default:
|
default:
|
||||||
@ -361,5 +399,13 @@ func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOpt
|
|||||||
err = multierror.Append(err, modelerr)
|
err = multierror.Append(err, modelerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
model, modelerr = ml.LoadBERT(modelFile)
|
||||||
|
if modelerr == nil {
|
||||||
|
updateModels(model)
|
||||||
|
return model, nil
|
||||||
|
} else {
|
||||||
|
err = multierror.Append(err, modelerr)
|
||||||
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user