mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
Add mutex on same models (parallel isn't supported yet)
This commit is contained in:
parent
ba70363330
commit
b710147b95
22
api.go
22
api.go
@ -49,10 +49,14 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
|
||||
NotFoundFile: "index.html",
|
||||
}))
|
||||
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
var mutex = &sync.Mutex{}
|
||||
mu := map[string]*sync.Mutex{}
|
||||
var mumutex = &sync.Mutex{}
|
||||
|
||||
// openAI compatible API endpoint
|
||||
app.Post("/v1/chat/completions", func(c *fiber.Ctx) error {
|
||||
|
||||
var err error
|
||||
var model *llama.LLama
|
||||
|
||||
@ -77,6 +81,23 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
|
||||
}
|
||||
}
|
||||
|
||||
// This is still needed, see: https://github.com/ggerganov/llama.cpp/discussions/784
|
||||
if input.Model != "" {
|
||||
mumutex.Lock()
|
||||
l, ok := mu[input.Model]
|
||||
if !ok {
|
||||
m := &sync.Mutex{}
|
||||
mu[input.Model] = m
|
||||
l = m
|
||||
}
|
||||
mumutex.Unlock()
|
||||
l.Lock()
|
||||
defer l.Unlock()
|
||||
} else {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
}
|
||||
|
||||
// Set the parameters for the language model prediction
|
||||
topP, err := strconv.ParseFloat(c.Query("topP", "0.9"), 64) // Default value of topP is 0.9
|
||||
if err != nil {
|
||||
@ -105,6 +126,7 @@ func api(defaultModel *llama.LLama, loader *ModelLoader, listenAddr string, thre
|
||||
|
||||
predInput := strings.Join(mess, "\n")
|
||||
|
||||
// A model can have a "file.bin.tmpl" file associated with a prompt template prefix
|
||||
templatedInput, err := loader.TemplatePrefix(input.Model, struct {
|
||||
Input string
|
||||
}{Input: predInput})
|
||||
|
Loading…
Reference in New Issue
Block a user