feat(llama.cpp): support distributed llama.cpp

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-05-14 19:42:05 +02:00
parent a670318a9f
commit 57ef494862
3 changed files with 20 additions and 2 deletions

5
.env
View File

@ -71,6 +71,11 @@
### Define the number of parallel LLAMA.cpp workers (Defaults to 1)
# LLAMACPP_PARALLEL=1
### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""
### Enable to run parallel requests
# LOCALAI_PARALLEL_REQUESTS=true

View File

@ -5,7 +5,7 @@ BINARY_NAME=local-ai
# llama.cpp versions
GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=dc685be46622a8fabfd57cfa804237c8f15679b8
CPPLLAMA_VERSION?=4f0263633b40e94e8b69fd6e7e4395cfedfd5c12
# gpt4all version
GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@ -314,7 +314,7 @@ build: prepare backend-assets grpcs ## Build the project
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp-avx2" GO_TAGS=none $(MAKE) build
build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
@ -764,3 +764,10 @@ docker-image-intel-xpu:
.PHONY: swagger
swagger:
swag init -g core/http/app.go --output swagger
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-grpc
$(MAKE) -C backend/cpp/llama-grpc purge
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_RPC=ON -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc

View File

@ -2217,6 +2217,12 @@ static void params_parse(const backend::ModelOptions* request,
} else {
params.n_parallel = 1;
}
const char *llama_grpc_servers = std::getenv("LLAMACPP_GRPC_SERVERS");
if (llama_grpc_servers != NULL) {
params.rpc_servers = std::string(llama_grpc_servers);
}
// TODO: Add yarn
if (!request->tensorsplit().empty()) {