diff --git a/Makefile b/Makefile index b412e6e4..35e4c344 100644 --- a/Makefile +++ b/Makefile @@ -152,9 +152,11 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts) OPTIONAL_GRPC+=backend-assets/grpc/piper endif -ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface +ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx +ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv @@ -293,6 +295,7 @@ clean: ## Remove build related file rm -rf backend-assets/* $(MAKE) -C backend/cpp/grpc clean $(MAKE) -C backend/cpp/llama clean + rm -rf backend/cpp/llama-* || true $(MAKE) dropreplace $(MAKE) protogen-clean rmdir pkg/grpc/proto || true @@ -311,7 +314,7 @@ build: prepare backend-assets grpcs ## Build the project CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./ build-minimal: - BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build + BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build build-api: BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build @@ -616,8 +619,8 @@ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/go CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/ -backend-assets/grpc/langchain-huggingface: backend-assets/grpc - $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/ +backend-assets/grpc/huggingface: backend-assets/grpc + $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/ backend/cpp/llama/llama.cpp: LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp @@ -629,7 +632,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \ -Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \ -DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \ -DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include -backend/cpp/llama/grpc-server: +build-llama-cpp-grpc-server: # Conditionally build grpc for the llama backend to use if needed ifdef BUILD_GRPC_FOR_BACKEND_LLAMA $(MAKE) -C backend/cpp/grpc build @@ -638,19 +641,37 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \ CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \ LLAMA_VERSION=$(CPPLLAMA_VERSION) \ - $(MAKE) -C backend/cpp/llama grpc-server + $(MAKE) -C backend/cpp/${VARIANT} grpc-server else echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined." - LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server + LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server endif -backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server - cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp +backend-assets/grpc/llama-cpp: backend-assets/grpc + $(info ${GREEN}I llama-cpp build info:standard${RESET}) + cp -rf backend/cpp/llama backend/cpp/llama-default + $(MAKE) -C backend/cpp/llama-default purge + $(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp # TODO: every binary should have its own folder instead, so can have different metal implementations ifeq ($(BUILD_TYPE),metal) - cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/ + cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/ endif +backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-noavx + $(MAKE) -C backend/cpp/llama-noavx purge + $(info ${GREEN}I llama-cpp build info:noavx${RESET}) + CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx + +backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc + cp -rf backend/cpp/llama backend/cpp/llama-fallback + $(MAKE) -C backend/cpp/llama-fallback purge + $(info ${GREEN}I llama-cpp build info:fallback${RESET}) + CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server + cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback + backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \ $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/ diff --git a/backend/cpp/llama/Makefile b/backend/cpp/llama/Makefile index 3d31284a..ed610861 100644 --- a/backend/cpp/llama/Makefile +++ b/backend/cpp/llama/Makefile @@ -43,31 +43,23 @@ llama.cpp: llama.cpp/examples/grpc-server: llama.cpp mkdir -p llama.cpp/examples/grpc-server - cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ - cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ - cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/ - cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/ - echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt -## XXX: In some versions of CMake clip wasn't being built before llama. -## This is an hack for now, but it should be fixed in the future. - cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h - cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp - echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h - cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h - cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp + bash prepare.sh rebuild: - cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/ - cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/ - cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/ + bash prepare.sh rm -rf grpc-server $(MAKE) grpc-server -clean: - rm -rf llama.cpp +purge: + rm -rf llama.cpp/build + rm -rf llama.cpp/examples/grpc-server rm -rf grpc-server +clean: purge + rm -rf llama.cpp + grpc-server: llama.cpp llama.cpp/examples/grpc-server + @echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)" ifneq (,$(findstring sycl,$(BUILD_TYPE))) bash -c "source $(ONEAPI_VARS); \ cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release" diff --git a/backend/cpp/llama/prepare.sh b/backend/cpp/llama/prepare.sh new file mode 100644 index 00000000..6c00f27c --- /dev/null +++ b/backend/cpp/llama/prepare.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +cp -r CMakeLists.txt llama.cpp/examples/grpc-server/ +cp -r grpc-server.cpp llama.cpp/examples/grpc-server/ +cp -rfv json.hpp llama.cpp/examples/grpc-server/ +cp -rfv utils.hpp llama.cpp/examples/grpc-server/ + +if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then + echo "grpc-server already added" +else + echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt +fi + +## XXX: In some versions of CMake clip wasn't being built before llama. +## This is an hack for now, but it should be fixed in the future. +cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h +cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp +echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h +cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h +cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp \ No newline at end of file diff --git a/backend/go/llm/langchain/langchain.go b/backend/go/llm/langchain/langchain.go index f1dee360..472d24d3 100644 --- a/backend/go/llm/langchain/langchain.go +++ b/backend/go/llm/langchain/langchain.go @@ -4,6 +4,7 @@ package main // It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc) import ( "fmt" + "os" "github.com/go-skynet/LocalAI/pkg/grpc/base" pb "github.com/go-skynet/LocalAI/pkg/grpc/proto" @@ -18,9 +19,14 @@ type LLM struct { } func (llm *LLM) Load(opts *pb.ModelOptions) error { - llm.langchain, _ = langchain.NewHuggingFace(opts.Model) + var err error + hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN") + if hfToken == "" { + return fmt.Errorf("no huggingface token provided") + } + llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken) llm.model = opts.Model - return nil + return err } func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) { diff --git a/core/http/app_test.go b/core/http/app_test.go index f4728770..e9ff3e28 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -787,11 +787,11 @@ var _ = Describe("API test", func() { }) It("returns errors", func() { - backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt}) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends))) + Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:")) }) + It("transcribes audio", func() { if runtime.GOOS != "linux" { Skip("test supported only on linux") diff --git a/pkg/langchain/huggingface.go b/pkg/langchain/huggingface.go index 38c55cd5..9be5ee9d 100644 --- a/pkg/langchain/huggingface.go +++ b/pkg/langchain/huggingface.go @@ -2,6 +2,7 @@ package langchain import ( "context" + "fmt" "github.com/tmc/langchaingo/llms" "github.com/tmc/langchaingo/llms/huggingface" @@ -9,11 +10,16 @@ import ( type HuggingFace struct { modelPath string + token string } -func NewHuggingFace(repoId string) (*HuggingFace, error) { +func NewHuggingFace(repoId, token string) (*HuggingFace, error) { + if token == "" { + return nil, fmt.Errorf("no huggingface token provided") + } return &HuggingFace{ modelPath: repoId, + token: token, }, nil } @@ -21,7 +27,7 @@ func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*P po := NewPredictOptions(opts...) // Init client - llm, err := huggingface.New() + llm, err := huggingface.New(huggingface.WithToken(s.token)) if err != nil { return nil, err } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index 5a65d01f..88051d5c 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -2,27 +2,32 @@ package model import ( "context" + "errors" "fmt" "os" "path/filepath" + "slices" "strings" "time" grpc "github.com/go-skynet/LocalAI/pkg/grpc" - "github.com/hashicorp/go-multierror" "github.com/phayes/freeport" "github.com/rs/zerolog/log" ) var Aliases map[string]string = map[string]string{ - "go-llama": LLamaCPP, - "llama": LLamaCPP, - "embedded-store": LocalStoreBackend, + "go-llama": LLamaCPP, + "llama": LLamaCPP, + "embedded-store": LocalStoreBackend, + "langchain-huggingface": LCHuggingFaceBackend, } const ( - LlamaGGML = "llama-ggml" - LLamaCPP = "llama-cpp" + LlamaGGML = "llama-ggml" + LLamaCPP = "llama-cpp" + + LLamaCPPFallback = "llama-cpp-fallback" + Gpt4AllLlamaBackend = "gpt4all-llama" Gpt4AllMptBackend = "gpt4all-mpt" Gpt4AllJBackend = "gpt4all-j" @@ -34,21 +39,73 @@ const ( StableDiffusionBackend = "stablediffusion" TinyDreamBackend = "tinydream" PiperBackend = "piper" - LCHuggingFaceBackend = "langchain-huggingface" + LCHuggingFaceBackend = "huggingface" LocalStoreBackend = "local-store" ) -var AutoLoadBackends []string = []string{ - LLamaCPP, - LlamaGGML, - Gpt4All, - BertEmbeddingsBackend, - RwkvBackend, - WhisperBackend, - StableDiffusionBackend, - TinyDreamBackend, - PiperBackend, +func backendPath(assetDir, backend string) string { + return filepath.Join(assetDir, "backend-assets", "grpc", backend) +} + +func backendsInAssetDir(assetDir string) ([]string, error) { + excludeBackends := []string{"local-store"} + entry, err := os.ReadDir(backendPath(assetDir, "")) + if err != nil { + return nil, err + } + var backends []string +ENTRY: + for _, e := range entry { + for _, exclude := range excludeBackends { + if e.Name() == exclude { + continue ENTRY + } + } + if !e.IsDir() { + backends = append(backends, e.Name()) + } + } + + // order backends from the asset directory. + // as we scan for backends, we want to keep some order which backends are tried of. + // for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last. + + // sets a priority list + // First has more priority + priorityList := []string{ + // First llama.cpp and llama-ggml + LLamaCPP, LLamaCPPFallback, LlamaGGML, Gpt4All, + } + toTheEnd := []string{ + // last has to be huggingface + LCHuggingFaceBackend, + // then bert embeddings + BertEmbeddingsBackend, + } + slices.Reverse(priorityList) + slices.Reverse(toTheEnd) + + // order certain backends first + for _, b := range priorityList { + for i, be := range backends { + if be == b { + backends = append([]string{be}, append(backends[:i], backends[i+1:]...)...) + break + } + } + } + // make sure that some others are pushed at the end + for _, b := range toTheEnd { + for i, be := range backends { + if be == b { + backends = append(append(backends[:i], backends[i+1:]...), be) + break + } + } + } + + return backends, nil } // starts the grpcModelProcess for the backend, and returns a grpc client @@ -99,7 +156,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string client = ModelAddress(uri) } } else { - grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend) + grpcProcess := backendPath(o.assetDir, backend) // Check if the file exists if _, err := os.Stat(grpcProcess); os.IsNotExist(err) { return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess) @@ -243,7 +300,12 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { // autoload also external backends allBackendsToAutoLoad := []string{} - allBackendsToAutoLoad = append(allBackendsToAutoLoad, AutoLoadBackends...) + autoLoadBackends, err := backendsInAssetDir(o.assetDir) + if err != nil { + return nil, err + } + log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends) + allBackendsToAutoLoad = append(allBackendsToAutoLoad, autoLoadBackends...) for _, b := range o.externalBackends { allBackendsToAutoLoad = append(allBackendsToAutoLoad, b) } @@ -271,10 +333,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) { log.Info().Msgf("[%s] Loads OK", b) return model, nil } else if modelerr != nil { - err = multierror.Append(err, modelerr) + err = errors.Join(err, modelerr) log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error()) } else if model == nil { - err = multierror.Append(err, fmt.Errorf("backend returned no usable model")) + err = errors.Join(err, fmt.Errorf("backend returned no usable model")) log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model") } }