2023-04-11 22:02:39 +00:00
|
|
|
package model
|
2023-04-07 09:30:59 +00:00
|
|
|
|
|
|
|
import (
|
2023-04-08 08:46:51 +00:00
|
|
|
"bytes"
|
2023-04-07 09:30:59 +00:00
|
|
|
"fmt"
|
2023-04-10 10:02:40 +00:00
|
|
|
"io/ioutil"
|
2023-04-07 09:30:59 +00:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2023-04-10 10:02:40 +00:00
|
|
|
"strings"
|
2023-04-07 09:30:59 +00:00
|
|
|
"sync"
|
2023-04-08 08:46:51 +00:00
|
|
|
"text/template"
|
2023-04-07 09:30:59 +00:00
|
|
|
|
2023-05-03 09:45:22 +00:00
|
|
|
rwkv "github.com/donomii/go-rwkv.cpp"
|
2023-05-11 12:05:07 +00:00
|
|
|
whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
2023-05-10 23:12:58 +00:00
|
|
|
bloomz "github.com/go-skynet/bloomz.cpp"
|
2023-05-10 13:20:21 +00:00
|
|
|
bert "github.com/go-skynet/go-bert.cpp"
|
2023-04-20 17:33:36 +00:00
|
|
|
gpt2 "github.com/go-skynet/go-gpt2.cpp"
|
2023-04-07 09:30:59 +00:00
|
|
|
llama "github.com/go-skynet/go-llama.cpp"
|
2023-05-10 23:12:58 +00:00
|
|
|
"github.com/hashicorp/go-multierror"
|
2023-05-11 12:31:19 +00:00
|
|
|
gpt4all "github.com/nomic/gpt4all/gpt4all-bindings/golang"
|
2023-05-10 23:12:58 +00:00
|
|
|
"github.com/rs/zerolog/log"
|
2023-04-07 09:30:59 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type ModelLoader struct {
|
2023-04-27 04:18:18 +00:00
|
|
|
ModelPath string
|
2023-04-20 17:33:36 +00:00
|
|
|
mu sync.Mutex
|
2023-05-10 13:20:21 +00:00
|
|
|
// TODO: this needs generics
|
2023-04-20 22:06:55 +00:00
|
|
|
models map[string]*llama.LLama
|
2023-05-11 12:31:19 +00:00
|
|
|
gptmodels map[string]*gpt4all.Model
|
2023-04-20 22:06:55 +00:00
|
|
|
gpt2models map[string]*gpt2.GPT2
|
|
|
|
gptstablelmmodels map[string]*gpt2.StableLM
|
2023-05-10 23:12:58 +00:00
|
|
|
dollymodels map[string]*gpt2.Dolly
|
|
|
|
redpajama map[string]*gpt2.RedPajama
|
2023-05-03 09:45:22 +00:00
|
|
|
rwkv map[string]*rwkv.RwkvState
|
2023-05-10 23:12:58 +00:00
|
|
|
bloomz map[string]*bloomz.Bloomz
|
2023-05-11 12:05:07 +00:00
|
|
|
bert map[string]*bert.Bert
|
|
|
|
promptsTemplates map[string]*template.Template
|
|
|
|
whisperModels map[string]whisper.Model
|
2023-04-07 09:30:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewModelLoader(modelPath string) *ModelLoader {
|
2023-04-20 22:06:55 +00:00
|
|
|
return &ModelLoader{
|
2023-04-27 04:18:18 +00:00
|
|
|
ModelPath: modelPath,
|
2023-04-20 22:06:55 +00:00
|
|
|
gpt2models: make(map[string]*gpt2.GPT2),
|
2023-05-11 12:31:19 +00:00
|
|
|
gptmodels: make(map[string]*gpt4all.Model),
|
2023-04-20 22:06:55 +00:00
|
|
|
gptstablelmmodels: make(map[string]*gpt2.StableLM),
|
2023-05-10 23:12:58 +00:00
|
|
|
dollymodels: make(map[string]*gpt2.Dolly),
|
|
|
|
redpajama: make(map[string]*gpt2.RedPajama),
|
2023-04-20 22:06:55 +00:00
|
|
|
models: make(map[string]*llama.LLama),
|
2023-05-03 09:45:22 +00:00
|
|
|
rwkv: make(map[string]*rwkv.RwkvState),
|
2023-05-10 23:12:58 +00:00
|
|
|
bloomz: make(map[string]*bloomz.Bloomz),
|
2023-05-10 13:20:21 +00:00
|
|
|
bert: make(map[string]*bert.Bert),
|
2023-04-20 22:06:55 +00:00
|
|
|
promptsTemplates: make(map[string]*template.Template),
|
2023-05-11 12:05:07 +00:00
|
|
|
whisperModels: make(map[string]whisper.Model),
|
2023-04-20 22:06:55 +00:00
|
|
|
}
|
2023-04-07 09:30:59 +00:00
|
|
|
}
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
func (ml *ModelLoader) ExistsInModelPath(s string) bool {
|
2023-04-27 04:18:18 +00:00
|
|
|
_, err := os.Stat(filepath.Join(ml.ModelPath, s))
|
2023-04-20 16:33:02 +00:00
|
|
|
return err == nil
|
|
|
|
}
|
|
|
|
|
2023-04-10 10:02:40 +00:00
|
|
|
func (ml *ModelLoader) ListModels() ([]string, error) {
|
2023-04-27 04:18:18 +00:00
|
|
|
files, err := ioutil.ReadDir(ml.ModelPath)
|
2023-04-10 10:02:40 +00:00
|
|
|
if err != nil {
|
|
|
|
return []string{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
models := []string{}
|
|
|
|
for _, file := range files {
|
2023-04-20 16:33:02 +00:00
|
|
|
// Skip templates, YAML and .keep files
|
|
|
|
if strings.HasSuffix(file.Name(), ".tmpl") || strings.HasSuffix(file.Name(), ".keep") || strings.HasSuffix(file.Name(), ".yaml") || strings.HasSuffix(file.Name(), ".yml") {
|
|
|
|
continue
|
2023-04-10 10:02:40 +00:00
|
|
|
}
|
2023-04-20 16:33:02 +00:00
|
|
|
|
|
|
|
models = append(models, file.Name())
|
2023-04-10 10:02:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return models, nil
|
|
|
|
}
|
|
|
|
|
2023-04-08 08:46:51 +00:00
|
|
|
func (ml *ModelLoader) TemplatePrefix(modelName string, in interface{}) (string, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
m, ok := ml.promptsTemplates[modelName]
|
|
|
|
if !ok {
|
2023-04-27 04:18:18 +00:00
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
t, exists := ml.promptsTemplates[modelName]
|
|
|
|
if exists {
|
|
|
|
m = t
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if m == nil {
|
2023-05-04 17:49:43 +00:00
|
|
|
return "", fmt.Errorf("failed loading any template")
|
2023-04-08 08:46:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
var buf bytes.Buffer
|
|
|
|
|
|
|
|
if err := m.Execute(&buf, in); err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return buf.String(), nil
|
|
|
|
}
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
func (ml *ModelLoader) loadTemplateIfExists(modelName, modelFile string) error {
|
|
|
|
// Check if the template was already loaded
|
|
|
|
if _, ok := ml.promptsTemplates[modelName]; ok {
|
|
|
|
return nil
|
|
|
|
}
|
2023-04-19 15:10:29 +00:00
|
|
|
|
|
|
|
// Check if the model path exists
|
2023-04-27 04:18:18 +00:00
|
|
|
// skip any error here - we run anyway if a template does not exist
|
2023-04-20 16:33:02 +00:00
|
|
|
modelTemplateFile := fmt.Sprintf("%s.tmpl", modelName)
|
|
|
|
|
|
|
|
if !ml.ExistsInModelPath(modelTemplateFile) {
|
2023-04-19 15:10:29 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-04-27 04:18:18 +00:00
|
|
|
dat, err := os.ReadFile(filepath.Join(ml.ModelPath, modelTemplateFile))
|
2023-04-19 15:10:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse the template
|
|
|
|
tmpl, err := template.New("prompt").Parse(string(dat))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
ml.promptsTemplates[modelName] = tmpl
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-05-10 23:12:58 +00:00
|
|
|
func (ml *ModelLoader) LoadRedPajama(modelName string) (*gpt2.RedPajama, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.redpajama[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := gpt2.NewRedPajama(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.redpajama[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ml *ModelLoader) LoadDollyModel(modelName string) (*gpt2.Dolly, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.dollymodels[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := gpt2.NewDolly(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.dollymodels[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-04-20 22:06:55 +00:00
|
|
|
func (ml *ModelLoader) LoadStableLMModel(modelName string) (*gpt2.StableLM, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.gptstablelmmodels[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
2023-04-27 04:18:18 +00:00
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
2023-04-20 22:06:55 +00:00
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := gpt2.NewStableLM(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.gptstablelmmodels[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-05-10 13:20:21 +00:00
|
|
|
func (ml *ModelLoader) LoadBERT(modelName string) (*bert.Bert, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.bert[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := bert.New(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.bert[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-05-10 23:12:58 +00:00
|
|
|
func (ml *ModelLoader) LoadBloomz(modelName string) (*bloomz.Bloomz, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.bloomz[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := bloomz.New(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.bloomz[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-04-20 17:33:36 +00:00
|
|
|
func (ml *ModelLoader) LoadGPT2Model(modelName string) (*gpt2.GPT2, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.gpt2models[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
2023-04-27 04:18:18 +00:00
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
2023-04-20 17:33:36 +00:00
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := gpt2.New(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.gpt2models[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
func (ml *ModelLoader) LoadGPT4AllModel(modelName string, opts ...gpt4all.ModelOption) (*gpt4all.Model, error) {
|
2023-04-07 09:30:59 +00:00
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
2023-04-20 16:33:02 +00:00
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
2023-04-07 09:30:59 +00:00
|
|
|
}
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
if m, ok := ml.gptmodels[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
2023-04-07 09:30:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
2023-04-27 04:18:18 +00:00
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
2023-04-20 16:33:02 +00:00
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, err := gpt4all.New(modelFile, opts...)
|
2023-04-07 09:30:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-04-08 08:46:51 +00:00
|
|
|
// If there is a prompt template, load it
|
2023-04-20 16:33:02 +00:00
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
2023-04-19 15:10:29 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
ml.gptmodels[modelName] = model
|
2023-04-19 15:10:29 +00:00
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-05-03 09:45:22 +00:00
|
|
|
func (ml *ModelLoader) LoadRWKV(modelName, tokenFile string, threads uint32) (*rwkv.RwkvState, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
log.Debug().Msgf("Loading model name: %s", modelName)
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.rwkv[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
tokenPath := filepath.Join(ml.ModelPath, tokenFile)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model := rwkv.LoadFiles(modelFile, tokenPath, threads)
|
|
|
|
if model == nil {
|
|
|
|
return nil, fmt.Errorf("could not load model")
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.rwkv[modelName] = model
|
|
|
|
return model, nil
|
|
|
|
}
|
|
|
|
|
2023-04-19 15:10:29 +00:00
|
|
|
func (ml *ModelLoader) LoadLLaMAModel(modelName string, opts ...llama.ModelOption) (*llama.LLama, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
log.Debug().Msgf("Loading model name: %s", modelName)
|
|
|
|
|
2023-04-19 15:10:29 +00:00
|
|
|
// Check if we already have a loaded model
|
2023-04-20 16:33:02 +00:00
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist")
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.models[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
2023-04-19 15:10:29 +00:00
|
|
|
return m, nil
|
|
|
|
}
|
2023-04-20 16:33:02 +00:00
|
|
|
|
2023-04-19 15:10:29 +00:00
|
|
|
// Load the model and keep it in memory for later use
|
2023-04-27 04:18:18 +00:00
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
2023-04-20 16:33:02 +00:00
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
2023-04-19 15:10:29 +00:00
|
|
|
model, err := llama.New(modelFile, opts...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// If there is a prompt template, load it
|
2023-04-20 16:33:02 +00:00
|
|
|
if err := ml.loadTemplateIfExists(modelName, modelFile); err != nil {
|
2023-04-19 15:10:29 +00:00
|
|
|
return nil, err
|
2023-04-08 08:46:51 +00:00
|
|
|
}
|
|
|
|
|
2023-04-20 16:33:02 +00:00
|
|
|
ml.models[modelName] = model
|
2023-04-07 09:30:59 +00:00
|
|
|
return model, err
|
|
|
|
}
|
2023-05-05 09:20:06 +00:00
|
|
|
|
2023-05-11 12:05:07 +00:00
|
|
|
func (ml *ModelLoader) LoadWhisperModel(modelName string) (whisper.Model, error) {
|
|
|
|
ml.mu.Lock()
|
|
|
|
defer ml.mu.Unlock()
|
|
|
|
|
|
|
|
// Check if we already have a loaded model
|
|
|
|
if !ml.ExistsInModelPath(modelName) {
|
|
|
|
return nil, fmt.Errorf("model does not exist -- %s", modelName)
|
|
|
|
}
|
|
|
|
|
|
|
|
if m, ok := ml.whisperModels[modelName]; ok {
|
|
|
|
log.Debug().Msgf("Model already loaded in memory: %s", modelName)
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the model and keep it in memory for later use
|
|
|
|
modelFile := filepath.Join(ml.ModelPath, modelName)
|
|
|
|
log.Debug().Msgf("Loading model in memory from file: %s", modelFile)
|
|
|
|
|
|
|
|
model, err := whisper.New(modelFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
ml.whisperModels[modelName] = model
|
|
|
|
return model, err
|
|
|
|
}
|
|
|
|
|
2023-05-05 09:20:06 +00:00
|
|
|
const tokenizerSuffix = ".tokenizer.json"
|
|
|
|
|
|
|
|
var loadedModels map[string]interface{} = map[string]interface{}{}
|
|
|
|
var muModels sync.Mutex
|
|
|
|
|
|
|
|
func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
|
|
|
switch strings.ToLower(backendString) {
|
|
|
|
case "llama":
|
|
|
|
return ml.LoadLLaMAModel(modelFile, llamaOpts...)
|
2023-05-10 23:12:58 +00:00
|
|
|
case "bloomz":
|
|
|
|
return ml.LoadBloomz(modelFile)
|
2023-05-05 09:20:06 +00:00
|
|
|
case "stablelm":
|
|
|
|
return ml.LoadStableLMModel(modelFile)
|
2023-05-10 23:12:58 +00:00
|
|
|
case "dolly":
|
|
|
|
return ml.LoadDollyModel(modelFile)
|
|
|
|
case "redpajama":
|
|
|
|
return ml.LoadRedPajama(modelFile)
|
2023-05-05 09:20:06 +00:00
|
|
|
case "gpt2":
|
|
|
|
return ml.LoadGPT2Model(modelFile)
|
2023-05-11 12:31:19 +00:00
|
|
|
case "gpt4all-llama":
|
|
|
|
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
|
|
|
|
case "gpt4all-mpt":
|
|
|
|
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
|
|
|
|
case "gpt4all-j":
|
|
|
|
return ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
|
2023-05-10 13:20:21 +00:00
|
|
|
case "bert-embeddings":
|
|
|
|
return ml.LoadBERT(modelFile)
|
2023-05-05 09:20:06 +00:00
|
|
|
case "rwkv":
|
|
|
|
return ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("backend unsupported: %s", backendString)
|
2023-05-11 12:05:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ml *ModelLoader) WhisperLoader(backendString string, modelFile string) (model whisper.Model, err error) {
|
|
|
|
//TODO expose more whisper options in next PR
|
|
|
|
switch strings.ToLower(backendString) {
|
|
|
|
case "whisper":
|
|
|
|
return ml.LoadWhisperModel(modelFile)
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("whisper backend unsupported: %s", backendString)
|
2023-05-05 09:20:06 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ml *ModelLoader) GreedyLoader(modelFile string, llamaOpts []llama.ModelOption, threads uint32) (model interface{}, err error) {
|
|
|
|
updateModels := func(model interface{}) {
|
|
|
|
muModels.Lock()
|
|
|
|
defer muModels.Unlock()
|
|
|
|
loadedModels[modelFile] = model
|
|
|
|
}
|
|
|
|
|
|
|
|
muModels.Lock()
|
|
|
|
m, exists := loadedModels[modelFile]
|
|
|
|
if exists {
|
|
|
|
muModels.Unlock()
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
muModels.Unlock()
|
|
|
|
|
|
|
|
model, modelerr := ml.LoadLLaMAModel(modelFile, llamaOpts...)
|
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.GPTJType))
|
2023-05-05 09:20:06 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType))
|
2023-05-05 09:20:06 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadGPT4AllModel(modelFile, gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.MPTType))
|
2023-05-05 09:20:06 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadGPT2Model(modelFile)
|
2023-05-10 23:12:58 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadStableLMModel(modelFile)
|
2023-05-10 23:12:58 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-11 12:31:19 +00:00
|
|
|
model, modelerr = ml.LoadDollyModel(modelFile)
|
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
|
|
|
model, modelerr = ml.LoadRedPajama(modelFile)
|
2023-05-10 23:12:58 +00:00
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
2023-05-11 12:31:19 +00:00
|
|
|
// Do not autoload bloomz
|
|
|
|
//model, modelerr = ml.LoadBloomz(modelFile)
|
|
|
|
//if modelerr == nil {
|
|
|
|
// updateModels(model)
|
|
|
|
// return model, nil
|
|
|
|
//} else {
|
|
|
|
// err = multierror.Append(err, modelerr)
|
|
|
|
//}
|
2023-05-10 23:12:58 +00:00
|
|
|
|
2023-05-05 09:20:06 +00:00
|
|
|
model, modelerr = ml.LoadRWKV(modelFile, modelFile+tokenizerSuffix, threads)
|
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-10 13:20:21 +00:00
|
|
|
model, modelerr = ml.LoadBERT(modelFile)
|
|
|
|
if modelerr == nil {
|
|
|
|
updateModels(model)
|
|
|
|
return model, nil
|
|
|
|
} else {
|
|
|
|
err = multierror.Append(err, modelerr)
|
|
|
|
}
|
|
|
|
|
2023-05-05 09:20:06 +00:00
|
|
|
return nil, fmt.Errorf("could not load model - all backends returned error: %s", err.Error())
|
|
|
|
}
|