diff --git a/README.md b/README.md index a10d29d1..175457a6 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ Note: You might need to convert older models to the new format, see [here](https A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv). -Note: rwkv models have an associated tokenizer along that needs to be provided with it: +Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it: ``` 36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small @@ -545,6 +545,7 @@ name: text-embedding-ada-002 parameters: model: bert embeddings: true +backend: "bert-embeddings" ``` There is an example available [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/). @@ -563,6 +564,7 @@ Download one of the models from https://huggingface.co/ggerganov/whisper.cpp/tre ```yaml name: whisper-1 +backend: whisper parameters: model: whisper-en ``` diff --git a/api/api_test.go b/api/api_test.go index de9fc34a..639f18d9 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -79,7 +79,7 @@ var _ = Describe("API test", func() { It("returns errors", func() { _, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"}) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:")) + Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:")) }) }) diff --git a/api/config.go b/api/config.go index fea30baf..43051286 100644 --- a/api/config.go +++ b/api/config.go @@ -285,5 +285,10 @@ func readConfig(cm ConfigMerger, c *fiber.Ctx, loader *model.ModelLoader, debug } } + // Enforce debug flag if passed from CLI + if debug { + config.Debug = true + } + return config, input, nil } diff --git a/api/openai.go b/api/openai.go index dcd21100..1045507d 100644 --- a/api/openai.go +++ b/api/openai.go @@ -12,8 +12,10 @@ import ( "path/filepath" "strings" + "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" model "github.com/go-skynet/LocalAI/pkg/model" - "github.com/go-skynet/LocalAI/pkg/whisper" + whisperutil "github.com/go-skynet/LocalAI/pkg/whisper" + llama "github.com/go-skynet/go-llama.cpp" "github.com/gofiber/fiber/v2" "github.com/rs/zerolog/log" "github.com/valyala/fasthttp" @@ -436,12 +438,14 @@ func transcriptEndpoint(cm ConfigMerger, debug bool, loader *model.ModelLoader, log.Debug().Msgf("Audio file copied to: %+v", dst) - whisperModel, err := loader.WhisperLoader("whisper", config.Model) + whisperModel, err := loader.BackendLoader("whisper", config.Model, []llama.ModelOption{}, uint32(config.Threads)) if err != nil { return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()}) } - tr, err := whisper.Transcript(whisperModel, dst, input.Language) + w := whisperModel.(whisper.Model) + + tr, err := whisperutil.Transcript(w, dst, input.Language) if err != nil { return c.Status(http.StatusBadRequest).JSON(fiber.Map{"error": err.Error()}) } diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go new file mode 100644 index 00000000..9e48ae4b --- /dev/null +++ b/pkg/model/initializers.go @@ -0,0 +1,158 @@ +package model + +import ( + "fmt" + "strings" + + rwkv "github.com/donomii/go-rwkv.cpp" + whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" + bloomz "github.com/go-skynet/bloomz.cpp" + bert "github.com/go-skynet/go-bert.cpp" + gpt2 "github.com/go-skynet/go-gpt2.cpp" + llama "github.com/go-skynet/go-llama.cpp" + "github.com/hashicorp/go-multierror" + gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang" + "github.com/rs/zerolog/log" +) + +const tokenizerSuffix = ".tokenizer.json" + +const ( + LlamaBackend = "llama" + BloomzBackend = "bloomz" + StableLMBackend = "stablelm" + DollyBackend = "dolly" + RedPajamaBackend = "redpajama" + Gpt2Backend = "gpt2" + Gpt4AllLlamaBackend = "gpt4all-llama" + Gpt4AllMptBackend = "gpt4all-mpt" + Gpt4AllJBackend = "gpt4all-j" + BertEmbeddingsBackend = "bert-embeddings" + RwkvBackend = "rwkv" + WhisperBackend = "whisper" +) + +var backends []string = []string{ + LlamaBackend, + Gpt4AllLlamaBackend, + Gpt4AllMptBackend, + Gpt4AllJBackend, + Gpt2Backend, + WhisperBackend, + RwkvBackend, + BloomzBackend, + StableLMBackend, + DollyBackend, + RedPajamaBackend, + BertEmbeddingsBackend, +} + +var redPajama = func(modelFile string) (interface{}, error) { + return gpt2.NewRedPajama(modelFile) +} + +var dolly = func(modelFile string) (interface{}, error) { + return gpt2.NewDolly(modelFile) +} + +var stableLM = func(modelFile string) (interface{}, error) { + return gpt2.NewStableLM(modelFile) +} + +var bertEmbeddings = func(modelFile string) (interface{}, error) { + return bert.New(modelFile) +} + +var bloomzLM = func(modelFile string) (interface{}, error) { + return bloomz.New(modelFile) +} +var gpt2LM = func(modelFile string) (interface{}, error) { + return gpt2.New(modelFile) +} + +var whisperModel = func(modelFile string) (interface{}, error) { + return whisper.New(modelFile) +} + +func llamaLM(opts ...llama.ModelOption) func(string) (interface{}, error) { + return func(s string) (interface{}, error) { + return llama.New(s, opts...) + } +} + +func gpt4allLM(opts ...gpt4all.ModelOption) func(string) (interface{}, error) { + return func(s string) (interface{}, error) { + return gpt4all.New(s, opts...) + } +} + +func rwkvLM(tokenFile string, threads uint32) func(string) (interface{}, error) { + return func(s string) (interface{}, error) { + model := rwkv.LoadFiles(s, tokenFile, threads) + if model == nil { + return nil, fmt.Errorf("could not load model") + } + return model, nil + } +} + +func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { + switch strings.ToLower(backendString) { + case LlamaBackend: + return ml.LoadModel(modelFile, llamaLM(llamaOpts...)) + case BloomzBackend: + return ml.LoadModel(modelFile, bloomzLM) + case StableLMBackend: + return ml.LoadModel(modelFile, stableLM) + case DollyBackend: + return ml.LoadModel(modelFile, dolly) + case RedPajamaBackend: + return ml.LoadModel(modelFile, redPajama) + case Gpt2Backend: + return ml.LoadModel(modelFile, gpt2LM) + case Gpt4AllLlamaBackend: + return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))) + case Gpt4AllMptBackend: + return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))) + case Gpt4AllJBackend: + return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))) + case BertEmbeddingsBackend: + return ml.LoadModel(modelFile, bertEmbeddings) + case RwkvBackend: + return ml.LoadModel(modelFile, rwkvLM(modelFile+tokenizerSuffix, threads)) + case WhisperBackend: + return ml.LoadModel(modelFile, whisperModel) + default: + return nil, fmt.Errorf("backend unsupported: %s", backendString) + } +} + +func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (interface{}, error) { + log.Debug().Msgf("Loading models greedly") + + ml.mu.Lock() + m, exists := ml.models[modelFile] + if exists { + ml.mu.Unlock() + return m, nil + } + ml.mu.Unlock() + var err error + + for _, b := range backends { + if b == BloomzBackend || b == WhisperBackend || b == RwkvBackend { // do not autoload bloomz/whisper/rwkv + continue + } + log.Debug().Msgf("[%s] Attempting to load", b) + model, modelerr := ml.BackendLoader(b, modelFile, llamaOpts, threads) + if modelerr == nil && model != nil { + log.Debug().Msgf("[%s] Loads OK", b) + return model, nil + } else if modelerr != nil { + err = multierror.Append(err, modelerr) + log.Debug().Msgf("[%s] Fails: %s", b, modelerr.Error()) + } + } + + return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error()) +} diff --git a/pkg/model/loader.go b/pkg/model/loader.go index fe82b863..ddc7b6eb 100644 --- a/pkg/model/loader.go +++ b/pkg/model/loader.go @@ -10,14 +10,6 @@ import ( "sync" "text/template" - rwkv "github.com/donomii/go-rwkv.cpp" - whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" - bloomz "github.com/go-skynet/bloomz.cpp" - bert "github.com/go-skynet/go-bert.cpp" - gpt2 "github.com/go-skynet/go-gpt2.cpp" - llama "github.com/go-skynet/go-llama.cpp" - "github.com/hashicorp/go-multierror" - gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang" "github.com/rs/zerolog/log" ) @@ -25,33 +17,15 @@ type ModelLoader struct { ModelPath string mu sync.Mutex // TODO: this needs generics - models map[string]*llama.LLama - gptmodels map[string]*gpt4all.Model - gpt2models map[string]*gpt2.GPT2 - gptstablelmmodels map[string]*gpt2.StableLM - dollymodels map[string]*gpt2.Dolly - redpajama map[string]*gpt2.RedPajama - rwkv map[string]*rwkv.RwkvState - bloomz map[string]*bloomz.Bloomz - bert map[string]*bert.Bert - promptsTemplates map[string]*template.Template - whisperModels map[string]whisper.Model + models map[string]interface{} + promptsTemplates map[string]*template.Template } func NewModelLoader(modelPath string) *ModelLoader { return &ModelLoader{ - ModelPath: modelPath, - gpt2models: make(map[string]*gpt2.GPT2), - gptmodels: make(map[string]*gpt4all.Model), - gptstablelmmodels: make(map[string]*gpt2.StableLM), - dollymodels: make(map[string]*gpt2.Dolly), - redpajama: make(map[string]*gpt2.RedPajama), - models: make(map[string]*llama.LLama), - rwkv: make(map[string]*rwkv.RwkvState), - bloomz: make(map[string]*bloomz.Bloomz), - bert: make(map[string]*bert.Bert), - promptsTemplates: make(map[string]*template.Template), - whisperModels: make(map[string]whisper.Model), + ModelPath: modelPath, + models: make(map[string]interface{}), + promptsTemplates: make(map[string]*template.Template), } } @@ -136,271 +110,11 @@ func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error { return nil } -func (ml *ModelLoader) LoadRedPajama(modelName string) (*gpt2.RedPajama, error) { +func (ml *ModelLoader) LoadModel(modelName string, loader func(string) (interface{}, error)) (interface{}, error) { ml.mu.Lock() defer ml.mu.Unlock() // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.redpajama[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := gpt2.NewRedPajama(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.redpajama[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadDollyModel(modelName string) (*gpt2.Dolly, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.dollymodels[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := gpt2.NewDolly(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.dollymodels[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.gptstablelmmodels[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := gpt2.NewStableLM(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.gptstablelmmodels[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.bert[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := bert.New(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.bert[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadBloomz(modelName string) (*bloomz.Bloomz, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.bloomz[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := bloomz.New(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.bloomz[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.gpt2models[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := gpt2.New(modelFile) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.gpt2models[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadGPT4AllModel(modelName string, opts ...gpt4all.ModelOption) (*gpt4all.Model, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.gptmodels[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := gpt4all.New(modelFile, opts...) - if err != nil { - return nil, err - } - - // If there is a prompt template, load it - if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil { - return nil, err - } - - ml.gptmodels[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - log.Debug().Msgf("Loading model name: %s", modelName) - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - - if m, ok := ml.rwkv[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - tokenPath := filepath.Join(ml.ModelPath, tokenFile) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model := rwkv.LoadFiles(modelFile, tokenPath, threads) - if model == nil { - return nil, fmt.Errorf("could not load model") - } - - ml.rwkv[modelName] = model - return model, nil -} - -func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - log.Debug().Msgf("Loading model name: %s", modelName) - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist") - } - if m, ok := ml.models[modelName]; ok { log.Debug().Msgf("Model already loaded in memory: %s", modelName) return m, nil @@ -410,7 +124,7 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio modelFile := filepath.Join(ml.ModelPath, modelName) log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - model, err := llama.New(modelFile, opts...) + model, err := loader(modelFile) if err != nil { return nil, err } @@ -421,182 +135,5 @@ func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOptio } ml.models[modelName] = model - return model, err -} - -func (ml *ModelLoader) LoadWhisperModel(modelName string) (whisper.Model, error) { - ml.mu.Lock() - defer ml.mu.Unlock() - - // Check if we already have a loaded model - if !ml.ExistsInModelPath(modelName) { - return nil, fmt.Errorf("model does not exist -- %s", modelName) - } - - if m, ok := ml.whisperModels[modelName]; ok { - log.Debug().Msgf("Model already loaded in memory: %s", modelName) - return m, nil - } - - // Load the model and keep it in memory for later use - modelFile := filepath.Join(ml.ModelPath, modelName) - log.Debug().Msgf("Loading model in memory from file: %s", modelFile) - - model, err := whisper.New(modelFile) - if err != nil { - return nil, err - } - - ml.whisperModels[modelName] = model - return model, err -} - -const tokenizerSuffix = ".tokenizer.json" - -var loadedModels map[string]interface{} = map[string]interface{}{} -var muModels sync.Mutex - -func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { - switch strings.ToLower(backendString) { - case "llama": - return ml.LoadLLaMAModel(modelFile, llamaOpts...) - case "bloomz": - return ml.LoadBloomz(modelFile) - case "stablelm": - return ml.LoadStableLMModel(modelFile) - case "dolly": - return ml.LoadDollyModel(modelFile) - case "redpajama": - return ml.LoadRedPajama(modelFile) - case "gpt2": - return ml.LoadGPT2Model(modelFile) - case "gpt4all-llama": - return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)) - case "gpt4all-mpt": - return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)) - case "gpt4all-j": - return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)) - case "bert-embeddings": - return ml.LoadBERT(modelFile) - case "rwkv": - return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) - default: - return nil, fmt.Errorf("backend unsupported: %s", backendString) - } -} - -func (ml *ModelLoader) WhisperLoader(backendString string, modelFile string) (model whisper.Model, err error) { - //TODO expose more whisper options in next PR - switch strings.ToLower(backendString) { - case "whisper": - return ml.LoadWhisperModel(modelFile) - default: - return nil, fmt.Errorf("whisper backend unsupported: %s", backendString) - } -} - -func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) { - updateModels := func(model interface{}) { - muModels.Lock() - defer muModels.Unlock() - loadedModels[modelFile] = model - } - - muModels.Lock() - m, exists := loadedModels[modelFile] - if exists { - muModels.Unlock() - return m, nil - } - muModels.Unlock() - - model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType)) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType)) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadGPT2Model(modelFile) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadStableLMModel(modelFile) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadDollyModel(modelFile) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadRedPajama(modelFile) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - // Do not autoload bloomz - //model, modelerr = ml.LoadBloomz(modelFile) - //if modelerr == nil { - // updateModels(model) - // return model, nil - //} else { - // err = multierror.Append(err, modelerr) - //} - - model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - model, modelerr = ml.LoadBERT(modelFile) - if modelerr == nil { - updateModels(model) - return model, nil - } else { - err = multierror.Append(err, modelerr) - } - - return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error()) + return model, nil }