Add mutex on same models (parallel isn't supported yet)

This commit is contained in:
mudler 2023-04-08 11:45:36 +02:00
parent ba70363330
commit b710147b95

22
api.go
View File

@ -49,10 +49,14 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
NotFoundFile: "index.html", NotFoundFile: "index.html",
})) }))
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
var mutex = &sync.Mutex{} var mutex = &sync.Mutex{}
mu := map[string]*sync.Mutex{}
var mumutex = &sync.Mutex{}
// openAI compatible API endpoint // openAI compatible API endpoint
app.Post("/v1/chat/completions", func(c *fiber.Ctx) error { app.Post("/v1/chat/completions", func(c *fiber.Ctx) error {
var err error var err error
var model *llama.LLama var model *llama.LLama
@ -77,6 +81,23 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
} }
} }
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
if input.Model != "" {
mumutex.Lock()
l, ok := mu[input.Model]
if !ok {
m := &sync.Mutex{}
mu[input.Model] = m
l = m
}
mumutex.Unlock()
l.Lock()
defer l.Unlock()
} else {
mutex.Lock()
defer mutex.Unlock()
}
// Set the parameters for the language model prediction // Set the parameters for the language model prediction
topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9 topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
if err != nil { if err != nil {
@ -105,6 +126,7 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
predInput := strings.Join(mess, "\n") predInput := strings.Join(mess, "\n")
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
templatedInput, err := loader.TemplatePrefix(input.Model, struct { templatedInput, err := loader.TemplatePrefix(input.Model, struct {
Input string Input string
}{Input: predInput}) }{Input: predInput})