minor fixups

Signed-off-by: mudler <mudler@localai.io>
2024-05-14 23:34:54 +02:00 · 2024-05-14 23:34:54 +02:00 · 793c45d7c4
parent cacdf676a2
commit 793c45d7c4
1 changed files with 16 additions and 9 deletions
--- a/pkg/model/initializers.go
+++ b/pkg/model/initializers.go
@ -12,9 +12,9 @@ import (

 	grpc "github.com/go-skynet/LocalAI/pkg/grpc"
 	"github.com/go-skynet/LocalAI/pkg/xsysinfo"
+	"github.com/klauspost/cpuid/v2"
 	"github.com/phayes/freeport"
 	"github.com/rs/zerolog/log"
-	"golang.org/x/sys/cpu"

 	"github.com/elliotchance/orderedmap/v2"
 )
@ -26,12 +26,13 @@ var Aliases map[string]string = map[string]string{
 	"langchain-huggingface": LCHuggingFaceBackend,
 }

+var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
+
 const (
 	LlamaGGML = "llama-ggml"

 	LLamaCPP = "llama-cpp"

-	LLamaCPPCUDA12   = "llama-cpp-cuda12"
 	LLamaCPPAVX2     = "llama-cpp-avx2"
 	LLamaCPPAVX      = "llama-cpp-avx"
 	LLamaCPPFallback = "llama-cpp-fallback"
@ -90,8 +91,9 @@ ENTRY:

 	// if we are autoDetecting, we want to show the llama.cpp variants as a single backend
 	if autoDetect {
-		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp)
-		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC := false, false, false, false
+		// if we find the llama.cpp variants, show them of as a single backend (llama-cpp) as later we are going to pick that up
+		// when starting the service
+		foundLCPPAVX, foundLCPPAVX2, foundLCPPFallback, foundLCPPGRPC, foundLCPPCuda := false, false, false, false, false
 		if _, ok := backends[LLamaCPP]; !ok {
 			for _, e := range entry {
 				if strings.Contains(e.Name(), LLamaCPPAVX2) && !foundLCPPAVX2 {
@ -110,6 +112,10 @@ ENTRY:
 					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPGRPC)
 					foundLCPPGRPC = true
 				}
+				if strings.Contains(e.Name(), LLamaCPPCUDA) && !foundLCPPCuda {
+					backends[LLamaCPP] = append(backends[LLamaCPP], LLamaCPPCUDA)
+					foundLCPPCuda = true
+				}
 			}
 		}
 	}
@ -172,6 +178,7 @@ func selectGRPCProcess(backend, assetDir string) string {

 	// Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
 	if os.Getenv("LLAMACPP_GRPC_SERVERS") != "" {
+		log.Info().Msgf("[%s] attempting to load with GRPC variant", LLamaCPPGRPC)
 		return backendPath(assetDir, LLamaCPPGRPC)
 	}

@ -179,11 +186,13 @@ func selectGRPCProcess(backend, assetDir string) string {
 	if err == nil {
 		for _, gpu := range gpus {
 			if strings.Contains(gpu.String(), "nvidia") {
-				log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
 				p := backendPath(assetDir, LLamaCPPCUDA)
 				if _, err := os.Stat(p); err == nil {
+					log.Info().Msgf("[%s] attempting to load with CUDA variant", backend)
 					grpcProcess = p
 					foundCUDA = true
+				} else {
+					log.Info().Msgf("GPU device found but no CUDA backend present")
 				}
 			}
 		}
@ -193,10 +202,10 @@ func selectGRPCProcess(backend, assetDir string) string {
 		return grpcProcess
 	}

-	if cpu.X86.HasAVX2 {
+	if xsysinfo.HasCPUCaps(cpuid.AVX2) {
 		log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
 		grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
-	} else if cpu.X86.HasAVX {
+	} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
 		log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
 		grpcProcess = backendPath(assetDir, LLamaCPPAVX)
 	} else {
@ -207,8 +216,6 @@ func selectGRPCProcess(backend, assetDir string) string {
 	return grpcProcess
 }

-var autoDetect = os.Getenv("DISABLE_AUTODETECT") != "true"
-
 // starts the grpcModelProcess for the backend, and returns a grpc client
 // It also loads the model
 func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string) (ModelAddress, error) {