mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
fix: drop racy code, refactor and group API schema (#931)
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
parent
28db83e17b
commit
cc060a283d
@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/localai"
|
"github.com/go-skynet/LocalAI/api/localai"
|
||||||
"github.com/go-skynet/LocalAI/api/openai"
|
"github.com/go-skynet/LocalAI/api/openai"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/internal"
|
"github.com/go-skynet/LocalAI/internal"
|
||||||
"github.com/go-skynet/LocalAI/pkg/assets"
|
"github.com/go-skynet/LocalAI/pkg/assets"
|
||||||
|
|
||||||
@ -104,8 +105,8 @@ func App(opts ...options.AppOption) (*fiber.App, error) {
|
|||||||
|
|
||||||
// Send custom error page
|
// Send custom error page
|
||||||
return ctx.Status(code).JSON(
|
return ctx.Status(code).JSON(
|
||||||
openai.ErrorResponse{
|
schema.ErrorResponse{
|
||||||
Error: &openai.APIError{Message: err.Error(), Code: code},
|
Error: &schema.APIError{Message: err.Error(), Code: code},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
|
@ -5,14 +5,14 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*api.Result, error) {
|
func ModelTranscription(audio, language string, loader *model.ModelLoader, c config.Config, o *options.Option) (*schema.Result, error) {
|
||||||
|
|
||||||
opts := modelOpts(c, o, []model.Option{
|
opts := modelOpts(c, o, []model.Option{
|
||||||
model.WithBackendString(model.WhisperBackend),
|
model.WithBackendString(model.WhisperBackend),
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grammar"
|
"github.com/go-skynet/LocalAI/pkg/grammar"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/go-skynet/LocalAI/pkg/utils"
|
"github.com/go-skynet/LocalAI/pkg/utils"
|
||||||
@ -21,20 +22,20 @@ import (
|
|||||||
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
emptyMessage := ""
|
emptyMessage := ""
|
||||||
|
|
||||||
process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
initialMessage := OpenAIResponse{
|
initialMessage := schema.OpenAIResponse{
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{{Delta: &Message{Role: "assistant", Content: &emptyMessage}}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Role: "assistant", Content: &emptyMessage}}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
}
|
}
|
||||||
responses <- initialMessage
|
responses <- initialMessage
|
||||||
|
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
resp := OpenAIResponse{
|
resp := schema.OpenAIResponse{
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{{Delta: &Message{Content: &s}, Index: 0}},
|
Choices: []schema.Choice{{Delta: &schema.Message{Content: &s}, Index: 0}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
Usage: OpenAIUsage{
|
Usage: schema.OpenAIUsage{
|
||||||
PromptTokens: usage.Prompt,
|
PromptTokens: usage.Prompt,
|
||||||
CompletionTokens: usage.Completion,
|
CompletionTokens: usage.Completion,
|
||||||
TotalTokens: usage.Prompt + usage.Completion,
|
TotalTokens: usage.Prompt + usage.Completion,
|
||||||
@ -236,13 +237,13 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if toStream {
|
if toStream {
|
||||||
responses := make(chan OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
go process(predInput, input, config, o.Loader, responses)
|
go process(predInput, input, config, o.Loader, responses)
|
||||||
|
|
||||||
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
|
||||||
|
|
||||||
usage := &OpenAIUsage{}
|
usage := &schema.OpenAIUsage{}
|
||||||
|
|
||||||
for ev := range responses {
|
for ev := range responses {
|
||||||
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
|
usage = &ev.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it
|
||||||
@ -259,13 +260,13 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
w.Flush()
|
w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
FinishReason: "stop",
|
FinishReason: "stop",
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Delta: &Message{Content: &emptyMessage},
|
Delta: &schema.Message{Content: &emptyMessage},
|
||||||
}},
|
}},
|
||||||
Object: "chat.completion.chunk",
|
Object: "chat.completion.chunk",
|
||||||
Usage: *usage,
|
Usage: *usage,
|
||||||
@ -279,7 +280,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]Choice) {
|
result, tokenUsage, err := ComputeChoices(input, predInput, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
if processFunctions {
|
if processFunctions {
|
||||||
// As we have to change the result before processing, we can't stream the answer (yet?)
|
// As we have to change the result before processing, we can't stream the answer (yet?)
|
||||||
ss := map[string]interface{}{}
|
ss := map[string]interface{}{}
|
||||||
@ -313,7 +314,7 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
message = backend.Finetune(*config, predInput, message)
|
message = backend.Finetune(*config, predInput, message)
|
||||||
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
|
log.Debug().Msgf("Reply received from LLM(finetuned): %s", message)
|
||||||
|
|
||||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &message}})
|
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &message}})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -336,28 +337,28 @@ func ChatEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response)
|
fineTunedResponse := backend.Finetune(*config, predInput, prediction.Response)
|
||||||
*c = append(*c, Choice{Message: &Message{Role: "assistant", Content: &fineTunedResponse}})
|
*c = append(*c, schema.Choice{Message: &schema.Message{Role: "assistant", Content: &fineTunedResponse}})
|
||||||
} else {
|
} else {
|
||||||
// otherwise reply with the function call
|
// otherwise reply with the function call
|
||||||
*c = append(*c, Choice{
|
*c = append(*c, schema.Choice{
|
||||||
FinishReason: "function_call",
|
FinishReason: "function_call",
|
||||||
Message: &Message{Role: "assistant", FunctionCall: ss},
|
Message: &schema.Message{Role: "assistant", FunctionCall: ss},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
*c = append(*c, Choice{FinishReason: "stop", Index: 0, Message: &Message{Role: "assistant", Content: &s}})
|
*c = append(*c, schema.Choice{FinishReason: "stop", Index: 0, Message: &schema.Message{Role: "assistant", Content: &s}})
|
||||||
}, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "chat.completion",
|
Object: "chat.completion",
|
||||||
Usage: OpenAIUsage{
|
Usage: schema.OpenAIUsage{
|
||||||
PromptTokens: tokenUsage.Prompt,
|
PromptTokens: tokenUsage.Prompt,
|
||||||
CompletionTokens: tokenUsage.Completion,
|
CompletionTokens: tokenUsage.Completion,
|
||||||
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
TotalTokens: tokenUsage.Prompt + tokenUsage.Completion,
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
@ -18,18 +19,18 @@ import (
|
|||||||
|
|
||||||
// https://platform.openai.com/docs/api-reference/completions
|
// https://platform.openai.com/docs/api-reference/completions
|
||||||
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx) error {
|
||||||
process := func(s string, req *OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan OpenAIResponse) {
|
process := func(s string, req *schema.OpenAIRequest, config *config.Config, loader *model.ModelLoader, responses chan schema.OpenAIResponse) {
|
||||||
ComputeChoices(req, s, config, o, loader, func(s string, c *[]Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
ComputeChoices(req, s, config, o, loader, func(s string, c *[]schema.Choice) {}, func(s string, usage backend.TokenUsage) bool {
|
||||||
resp := OpenAIResponse{
|
resp := schema.OpenAIResponse{
|
||||||
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
Text: s,
|
Text: s,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Object: "text_completion",
|
Object: "text_completion",
|
||||||
Usage: OpenAIUsage{
|
Usage: schema.OpenAIUsage{
|
||||||
PromptTokens: usage.Prompt,
|
PromptTokens: usage.Prompt,
|
||||||
CompletionTokens: usage.Completion,
|
CompletionTokens: usage.Completion,
|
||||||
TotalTokens: usage.Prompt + usage.Completion,
|
TotalTokens: usage.Prompt + usage.Completion,
|
||||||
@ -90,7 +91,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
log.Debug().Msgf("Template found, input modified to: %s", predInput)
|
||||||
}
|
}
|
||||||
|
|
||||||
responses := make(chan OpenAIResponse)
|
responses := make(chan schema.OpenAIResponse)
|
||||||
|
|
||||||
go process(predInput, input, config, o.Loader, responses)
|
go process(predInput, input, config, o.Loader, responses)
|
||||||
|
|
||||||
@ -106,9 +107,9 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
w.Flush()
|
w.Flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: []Choice{
|
Choices: []schema.Choice{
|
||||||
{
|
{
|
||||||
Index: 0,
|
Index: 0,
|
||||||
FinishReason: "stop",
|
FinishReason: "stop",
|
||||||
@ -125,7 +126,7 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var result []Choice
|
var result []schema.Choice
|
||||||
|
|
||||||
totalTokenUsage := backend.TokenUsage{}
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
@ -140,8 +141,9 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]Choice) {
|
r, tokenUsage, err := ComputeChoices(
|
||||||
*c = append(*c, Choice{Text: s, FinishReason: "stop", Index: k})
|
input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
|
*c = append(*c, schema.Choice{Text: s, FinishReason: "stop", Index: k})
|
||||||
}, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -153,11 +155,11 @@ func CompletionEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
result = append(result, r...)
|
result = append(result, r...)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "text_completion",
|
Object: "text_completion",
|
||||||
Usage: OpenAIUsage{
|
Usage: schema.OpenAIUsage{
|
||||||
PromptTokens: totalTokenUsage.Prompt,
|
PromptTokens: totalTokenUsage.Prompt,
|
||||||
CompletionTokens: totalTokenUsage.Completion,
|
CompletionTokens: totalTokenUsage.Completion,
|
||||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||||
|
@ -7,8 +7,10 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
|
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -32,7 +34,7 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
templateFile = config.TemplateConfig.Edit
|
templateFile = config.TemplateConfig.Edit
|
||||||
}
|
}
|
||||||
|
|
||||||
var result []Choice
|
var result []schema.Choice
|
||||||
totalTokenUsage := backend.TokenUsage{}
|
totalTokenUsage := backend.TokenUsage{}
|
||||||
|
|
||||||
for _, i := range config.InputStrings {
|
for _, i := range config.InputStrings {
|
||||||
@ -47,8 +49,8 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
log.Debug().Msgf("Template found, input modified to: %s", i)
|
log.Debug().Msgf("Template found, input modified to: %s", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]Choice) {
|
r, tokenUsage, err := ComputeChoices(input, i, config, o, o.Loader, func(s string, c *[]schema.Choice) {
|
||||||
*c = append(*c, Choice{Text: s})
|
*c = append(*c, schema.Choice{Text: s})
|
||||||
}, nil)
|
}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -60,11 +62,11 @@ func EditEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx)
|
|||||||
result = append(result, r...)
|
result = append(result, r...)
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Choices: result,
|
Choices: result,
|
||||||
Object: "edit",
|
Object: "edit",
|
||||||
Usage: OpenAIUsage{
|
Usage: schema.OpenAIUsage{
|
||||||
PromptTokens: totalTokenUsage.Prompt,
|
PromptTokens: totalTokenUsage.Prompt,
|
||||||
CompletionTokens: totalTokenUsage.Completion,
|
CompletionTokens: totalTokenUsage.Completion,
|
||||||
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
TotalTokens: totalTokenUsage.Prompt + totalTokenUsage.Completion,
|
||||||
|
@ -6,6 +6,8 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
@ -25,7 +27,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().Msgf("Parameter Config: %+v", config)
|
log.Debug().Msgf("Parameter Config: %+v", config)
|
||||||
items := []Item{}
|
items := []schema.Item{}
|
||||||
|
|
||||||
for i, s := range config.InputToken {
|
for i, s := range config.InputToken {
|
||||||
// get the model function to call for the result
|
// get the model function to call for the result
|
||||||
@ -38,7 +40,7 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, s := range config.InputStrings {
|
for i, s := range config.InputStrings {
|
||||||
@ -52,10 +54,10 @@ func EmbeddingsEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fibe
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
items = append(items, Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
items = append(items, schema.Item{Embedding: embeddings, Index: i, Object: "embedding"})
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
Model: input.Model, // we have to return what the user sent here, due to OpenAI spec.
|
||||||
Data: items,
|
Data: items,
|
||||||
Object: "list",
|
Object: "list",
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
@ -100,7 +101,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
b64JSON = true
|
b64JSON = true
|
||||||
}
|
}
|
||||||
// src and clip_skip
|
// src and clip_skip
|
||||||
var result []Item
|
var result []schema.Item
|
||||||
for _, i := range config.PromptStrings {
|
for _, i := range config.PromptStrings {
|
||||||
n := input.N
|
n := input.N
|
||||||
if input.N == 0 {
|
if input.N == 0 {
|
||||||
@ -155,7 +156,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
item := &Item{}
|
item := &schema.Item{}
|
||||||
|
|
||||||
if b64JSON {
|
if b64JSON {
|
||||||
defer os.RemoveAll(output)
|
defer os.RemoveAll(output)
|
||||||
@ -173,7 +174,7 @@ func ImageEndpoint(cm *config.ConfigLoader, o *options.Option) func(c *fiber.Ctx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resp := &OpenAIResponse{
|
resp := &schema.OpenAIResponse{
|
||||||
Data: result,
|
Data: result,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,12 +4,20 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/api/backend"
|
"github.com/go-skynet/LocalAI/api/backend"
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
"github.com/go-skynet/LocalAI/api/options"
|
"github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ComputeChoices(req *OpenAIRequest, predInput string, config *config.Config, o *options.Option, loader *model.ModelLoader, cb func(string, *[]Choice), tokenCallback func(string, backend.TokenUsage) bool) ([]Choice, backend.TokenUsage, error) {
|
func ComputeChoices(
|
||||||
|
req *schema.OpenAIRequest,
|
||||||
|
predInput string,
|
||||||
|
config *config.Config,
|
||||||
|
o *options.Option,
|
||||||
|
loader *model.ModelLoader,
|
||||||
|
cb func(string, *[]schema.Choice),
|
||||||
|
tokenCallback func(string, backend.TokenUsage) bool) ([]schema.Choice, backend.TokenUsage, error) {
|
||||||
n := req.N // number of completions to return
|
n := req.N // number of completions to return
|
||||||
result := []Choice{}
|
result := []schema.Choice{}
|
||||||
|
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
n = 1
|
n = 1
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"regexp"
|
"regexp"
|
||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
)
|
)
|
||||||
@ -16,7 +17,7 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
}
|
}
|
||||||
var mm map[string]interface{} = map[string]interface{}{}
|
var mm map[string]interface{} = map[string]interface{}{}
|
||||||
|
|
||||||
dataModels := []OpenAIModel{}
|
dataModels := []schema.OpenAIModel{}
|
||||||
|
|
||||||
var filterFn func(name string) bool
|
var filterFn func(name string) bool
|
||||||
filter := c.Query("filter")
|
filter := c.Query("filter")
|
||||||
@ -45,7 +46,7 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
}
|
}
|
||||||
|
|
||||||
if filterFn(c.Name) {
|
if filterFn(c.Name) {
|
||||||
dataModels = append(dataModels, OpenAIModel{ID: c.Name, Object: "model"})
|
dataModels = append(dataModels, schema.OpenAIModel{ID: c.Name, Object: "model"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,13 +54,13 @@ func ListModelsEndpoint(loader *model.ModelLoader, cm *config.ConfigLoader) func
|
|||||||
for _, m := range models {
|
for _, m := range models {
|
||||||
// And only adds them if they shouldn't be skipped.
|
// And only adds them if they shouldn't be skipped.
|
||||||
if _, exists := mm[m]; !exists && filterFn(m) {
|
if _, exists := mm[m]; !exists && filterFn(m) {
|
||||||
dataModels = append(dataModels, OpenAIModel{ID: m, Object: "model"})
|
dataModels = append(dataModels, schema.OpenAIModel{ID: m, Object: "model"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return c.JSON(struct {
|
return c.JSON(struct {
|
||||||
Object string `json:"object"`
|
Object string `json:"object"`
|
||||||
Data []OpenAIModel `json:"data"`
|
Data []schema.OpenAIModel `json:"data"`
|
||||||
}{
|
}{
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: dataModels,
|
Data: dataModels,
|
||||||
|
@ -10,14 +10,15 @@ import (
|
|||||||
|
|
||||||
config "github.com/go-skynet/LocalAI/api/config"
|
config "github.com/go-skynet/LocalAI/api/config"
|
||||||
options "github.com/go-skynet/LocalAI/api/options"
|
options "github.com/go-skynet/LocalAI/api/options"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
model "github.com/go-skynet/LocalAI/pkg/model"
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
||||||
"github.com/gofiber/fiber/v2"
|
"github.com/gofiber/fiber/v2"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
)
|
)
|
||||||
|
|
||||||
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *OpenAIRequest, error) {
|
func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *schema.OpenAIRequest, error) {
|
||||||
loader := o.Loader
|
loader := o.Loader
|
||||||
input := new(OpenAIRequest)
|
input := new(schema.OpenAIRequest)
|
||||||
ctx, cancel := context.WithCancel(o.Context)
|
ctx, cancel := context.WithCancel(o.Context)
|
||||||
input.Context = ctx
|
input.Context = ctx
|
||||||
input.Cancel = cancel
|
input.Cancel = cancel
|
||||||
@ -60,7 +61,7 @@ func readInput(c *fiber.Ctx, o *options.Option, randomModel bool) (string, *Open
|
|||||||
return modelFile, input, nil
|
return modelFile, input, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func updateConfig(config *config.Config, input *OpenAIRequest) {
|
func updateConfig(config *config.Config, input *schema.OpenAIRequest) {
|
||||||
if input.Echo {
|
if input.Echo {
|
||||||
config.Echo = input.Echo
|
config.Echo = input.Echo
|
||||||
}
|
}
|
||||||
@ -218,7 +219,7 @@ func updateConfig(config *config.Config, input *OpenAIRequest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readConfig(modelFile string, input *OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *OpenAIRequest, error) {
|
func readConfig(modelFile string, input *schema.OpenAIRequest, cm *config.ConfigLoader, loader *model.ModelLoader, debug bool, threads, ctx int, f16 bool) (*config.Config, *schema.OpenAIRequest, error) {
|
||||||
// Load a config file if present after the model name
|
// Load a config file if present after the model name
|
||||||
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
modelConfig := filepath.Join(loader.ModelPath, modelFile+".yaml")
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
package openai
|
package schema
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
@ -1,4 +1,4 @@
|
|||||||
package api
|
package schema
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
|
|
@ -5,8 +5,8 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
|
bert "github.com/go-skynet/LocalAI/pkg/backend/llm/bert"
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
bert "github.com/go-skynet/LocalAI/pkg/grpc/llm/bert"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
bloomz "github.com/go-skynet/LocalAI/pkg/grpc/llm/bloomz"
|
bloomz "github.com/go-skynet/LocalAI/pkg/backend/llm/bloomz"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -7,7 +7,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
falcon "github.com/go-skynet/LocalAI/pkg/grpc/llm/falcon"
|
falcon "github.com/go-skynet/LocalAI/pkg/backend/llm/falcon"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
gpt4all "github.com/go-skynet/LocalAI/pkg/grpc/llm/gpt4all"
|
gpt4all "github.com/go-skynet/LocalAI/pkg/backend/llm/gpt4all"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
langchain "github.com/go-skynet/LocalAI/pkg/grpc/llm/langchain"
|
langchain "github.com/go-skynet/LocalAI/pkg/backend/llm/langchain"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -7,7 +7,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
llama "github.com/go-skynet/LocalAI/pkg/grpc/llm/llama"
|
llama "github.com/go-skynet/LocalAI/pkg/backend/llm/llama"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
tts "github.com/go-skynet/LocalAI/pkg/grpc/tts"
|
tts "github.com/go-skynet/LocalAI/pkg/backend/tts"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
rwkv "github.com/go-skynet/LocalAI/pkg/grpc/llm/rwkv"
|
rwkv "github.com/go-skynet/LocalAI/pkg/backend/llm/rwkv"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
image "github.com/go-skynet/LocalAI/pkg/grpc/image"
|
image "github.com/go-skynet/LocalAI/pkg/backend/image"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transformers "github.com/go-skynet/LocalAI/pkg/grpc/llm/transformers"
|
transformers "github.com/go-skynet/LocalAI/pkg/backend/llm/transformers"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -5,7 +5,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"flag"
|
"flag"
|
||||||
|
|
||||||
transcribe "github.com/go-skynet/LocalAI/pkg/grpc/transcribe"
|
transcribe "github.com/go-skynet/LocalAI/pkg/backend/transcribe"
|
||||||
|
|
||||||
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
grpc "github.com/go-skynet/LocalAI/pkg/grpc"
|
||||||
)
|
)
|
||||||
|
@ -9,7 +9,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type StableDiffusion struct {
|
type StableDiffusion struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
stablediffusion *stablediffusion.StableDiffusion
|
stablediffusion *stablediffusion.StableDiffusion
|
||||||
}
|
}
|
||||||
|
|
@ -4,32 +4,23 @@ package bert
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
bert "github.com/go-skynet/go-bert.cpp"
|
bert "github.com/go-skynet/go-bert.cpp"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Embeddings struct {
|
type Embeddings struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
bert *bert.Bert
|
bert *bert.Bert
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
func (llm *Embeddings) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("bert backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := bert.New(opts.ModelFile)
|
model, err := bert.New(opts.ModelFile)
|
||||||
llm.bert = model
|
llm.bert = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
func (llm *Embeddings) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
if len(opts.EmbeddingTokens) > 0 {
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
tokens := []int{}
|
tokens := []int{}
|
@ -7,24 +7,17 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
"github.com/go-skynet/bloomz.cpp"
|
"github.com/go-skynet/bloomz.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
bloomz *bloomz.Bloomz
|
bloomz *bloomz.Bloomz
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("bloomz backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := bloomz.New(opts.ModelFile)
|
model, err := bloomz.New(opts.ModelFile)
|
||||||
llm.bloomz = model
|
llm.bloomz = model
|
||||||
return err
|
return err
|
||||||
@ -47,16 +40,11 @@ func buildPredictOptions(opts *pb.PredictOptions) []bloomz.PredictOption {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.bloomz.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -65,7 +53,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -7,25 +7,17 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
ggllm "github.com/mudler/go-ggllm.cpp"
|
ggllm "github.com/mudler/go-ggllm.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
falcon *ggllm.Falcon
|
falcon *ggllm.Falcon
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("falcon backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
ggllmOpts := []ggllm.ModelOption{}
|
ggllmOpts := []ggllm.ModelOption{}
|
||||||
if opts.ContextSize != 0 {
|
if opts.ContextSize != 0 {
|
||||||
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
|
ggllmOpts = append(ggllmOpts, ggllm.SetContext(int(opts.ContextSize)))
|
||||||
@ -126,13 +118,10 @@ func buildPredictOptions(opts *pb.PredictOptions) []ggllm.PredictOption {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
@ -150,7 +139,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
fmt.Println("err: ", err)
|
fmt.Println("err: ", err)
|
||||||
}
|
}
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -8,23 +8,15 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
gpt4all "github.com/nomic-ai/gpt4all/gpt4all-bindings/golang"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
gpt4all *gpt4all.Model
|
gpt4all *gpt4all.Model
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("gpt4all backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
model, err := gpt4all.New(opts.ModelFile,
|
model, err := gpt4all.New(opts.ModelFile,
|
||||||
gpt4all.SetThreads(int(opts.Threads)),
|
gpt4all.SetThreads(int(opts.Threads)),
|
||||||
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
gpt4all.SetLibrarySearchPath(opts.LibrarySearchPath))
|
||||||
@ -47,15 +39,10 @@ func buildPredictOptions(opts *pb.PredictOptions) []gpt4all.PredictOption {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.gpt4all.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -69,7 +56,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
}
|
}
|
||||||
llm.gpt4all.SetTokenCallback(nil)
|
llm.gpt4all.SetTokenCallback(nil)
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -8,7 +8,6 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/langchain"
|
"github.com/go-skynet/LocalAI/pkg/langchain"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
@ -19,21 +18,12 @@ type LLM struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("langchain backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
|
||||||
llm.model = opts.Model
|
llm.model = opts.Model
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
o := []langchain.PredictOption{
|
o := []langchain.PredictOption{
|
||||||
langchain.SetModel(llm.model),
|
langchain.SetModel(llm.model),
|
||||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||||
@ -48,7 +38,6 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
o := []langchain.PredictOption{
|
o := []langchain.PredictOption{
|
||||||
langchain.SetModel(llm.model),
|
langchain.SetModel(llm.model),
|
||||||
langchain.SetMaxTokens(int(opts.Tokens)),
|
langchain.SetMaxTokens(int(opts.Tokens)),
|
||||||
@ -63,7 +52,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
}
|
}
|
||||||
results <- res.Completion
|
results <- res.Completion
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -8,24 +8,15 @@ import (
|
|||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/go-llama.cpp"
|
"github.com/go-skynet/go-llama.cpp"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
llama *llama.LLama
|
llama *llama.LLama
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
|
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("llama backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
ropeFreqBase := float32(10000)
|
ropeFreqBase := float32(10000)
|
||||||
ropeFreqScale := float32(1)
|
ropeFreqScale := float32(1)
|
||||||
|
|
||||||
@ -176,14 +167,10 @@ func buildPredictOptions(opts *pb.PredictOptions) []llama.PredictOption {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.llama.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
predictOptions = append(predictOptions, llama.SetTokenCallback(func(token string) bool {
|
||||||
@ -197,16 +184,12 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
fmt.Println("err: ", err)
|
fmt.Println("err: ", err)
|
||||||
}
|
}
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
predictOptions := buildPredictOptions(opts)
|
||||||
|
|
||||||
if len(opts.EmbeddingTokens) > 0 {
|
if len(opts.EmbeddingTokens) > 0 {
|
||||||
@ -221,9 +204,6 @@ func (llm *LLM) Embeddings(opts *pb.PredictOptions) ([]float32, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
func (llm *LLM) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationResponse, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
predictOptions := buildPredictOptions(opts)
|
predictOptions := buildPredictOptions(opts)
|
||||||
l, tokens, err := llm.llama.TokenizeString(opts.Prompt, predictOptions...)
|
l, tokens, err := llm.llama.TokenizeString(opts.Prompt, predictOptions...)
|
||||||
if err != nil {
|
if err != nil {
|
@ -9,24 +9,17 @@ import (
|
|||||||
"github.com/donomii/go-rwkv.cpp"
|
"github.com/donomii/go-rwkv.cpp"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const tokenizerSuffix = ".tokenizer.json"
|
const tokenizerSuffix = ".tokenizer.json"
|
||||||
|
|
||||||
type LLM struct {
|
type LLM struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
rwkv *rwkv.RwkvState
|
rwkv *rwkv.RwkvState
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("rwkv backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
modelPath := filepath.Dir(opts.ModelFile)
|
modelPath := filepath.Dir(opts.ModelFile)
|
||||||
modelFile := filepath.Base(opts.ModelFile)
|
modelFile := filepath.Base(opts.ModelFile)
|
||||||
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
model := rwkv.LoadFiles(opts.ModelFile, filepath.Join(modelPath, modelFile+tokenizerSuffix), uint32(opts.GetThreads()))
|
||||||
@ -39,9 +32,6 @@ func (llm *LLM) Load(opts *pb.ModelOptions) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
stopWord := "\n"
|
stopWord := "\n"
|
||||||
if len(opts.StopPrompts) > 0 {
|
if len(opts.StopPrompts) > 0 {
|
||||||
stopWord = opts.StopPrompts[0]
|
stopWord = opts.StopPrompts[0]
|
||||||
@ -57,7 +47,6 @@ func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
|
|
||||||
stopWord := "\n"
|
stopWord := "\n"
|
||||||
@ -75,7 +64,6 @@ func (llm *LLM) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Dolly struct {
|
type Dolly struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
dolly *transformers.Dolly
|
dolly *transformers.Dolly
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
func (llm *Dolly) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("dolly backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewDolly(opts.ModelFile)
|
model, err := transformers.NewDolly(opts.ModelFile)
|
||||||
llm.dolly = model
|
llm.dolly = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *Dolly) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.dolly.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
@ -48,7 +38,6 @@ func (llm *Dolly) PredictStream(opts *pb.PredictOptions, results chan string) er
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Falcon struct {
|
type Falcon struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
falcon *transformers.Falcon
|
falcon *transformers.Falcon
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
func (llm *Falcon) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("transformers-falcon backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewFalcon(opts.ModelFile)
|
model, err := transformers.NewFalcon(opts.ModelFile)
|
||||||
llm.falcon = model
|
llm.falcon = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *Falcon) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.falcon.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *Falcon) PredictStream(opts *pb.PredictOptions, results chan string) e
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GPT2 struct {
|
type GPT2 struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
gpt2 *transformers.GPT2
|
gpt2 *transformers.GPT2
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
func (llm *GPT2) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("gpt2 backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.New(opts.ModelFile)
|
model, err := transformers.New(opts.ModelFile)
|
||||||
llm.gpt2 = model
|
llm.gpt2 = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *GPT2) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.gpt2.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *GPT2) PredictStream(opts *pb.PredictOptions, results chan string) err
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GPTJ struct {
|
type GPTJ struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
gptj *transformers.GPTJ
|
gptj *transformers.GPTJ
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
func (llm *GPTJ) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("gptj backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewGPTJ(opts.ModelFile)
|
model, err := transformers.NewGPTJ(opts.ModelFile)
|
||||||
llm.gptj = model
|
llm.gptj = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *GPTJ) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.gptj.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *GPTJ) PredictStream(opts *pb.PredictOptions, results chan string) err
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GPTNeoX struct {
|
type GPTNeoX struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
gptneox *transformers.GPTNeoX
|
gptneox *transformers.GPTNeoX
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
func (llm *GPTNeoX) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("gptneox backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewGPTNeoX(opts.ModelFile)
|
model, err := transformers.NewGPTNeoX(opts.ModelFile)
|
||||||
llm.gptneox = model
|
llm.gptneox = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *GPTNeoX) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.gptneox.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *GPTNeoX) PredictStream(opts *pb.PredictOptions, results chan string)
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
@ -7,39 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MPT struct {
|
type MPT struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
mpt *transformers.MPT
|
mpt *transformers.MPT
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
func (llm *MPT) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("mpt backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewMPT(opts.ModelFile)
|
model, err := transformers.NewMPT(opts.ModelFile)
|
||||||
llm.mpt = model
|
llm.mpt = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *MPT) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
|
|
||||||
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.mpt.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -48,7 +37,6 @@ func (llm *MPT) PredictStream(opts *pb.PredictOptions, results chan string) erro
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Replit struct {
|
type Replit struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
replit *transformers.Replit
|
replit *transformers.Replit
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
func (llm *Replit) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("replit backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewReplit(opts.ModelFile)
|
model, err := transformers.NewReplit(opts.ModelFile)
|
||||||
llm.replit = model
|
llm.replit = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *Replit) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.replit.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *Replit) PredictStream(opts *pb.PredictOptions, results chan string) e
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
@ -7,38 +7,28 @@ import (
|
|||||||
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
transformers "github.com/go-skynet/go-ggml-transformers.cpp"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Starcoder struct {
|
type Starcoder struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
|
|
||||||
starcoder *transformers.Starcoder
|
starcoder *transformers.Starcoder
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
func (llm *Starcoder) Load(opts *pb.ModelOptions) error {
|
||||||
if llm.Base.State != pb.StatusResponse_UNINITIALIZED {
|
|
||||||
log.Warn().Msgf("starcoder backend loading %s while already in state %s!", opts.Model, llm.Base.State.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
model, err := transformers.NewStarcoder(opts.ModelFile)
|
model, err := transformers.NewStarcoder(opts.ModelFile)
|
||||||
llm.starcoder = model
|
llm.starcoder = model
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
|
func (llm *Starcoder) Predict(opts *pb.PredictOptions) (string, error) {
|
||||||
llm.Base.Lock()
|
|
||||||
defer llm.Base.Unlock()
|
|
||||||
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
return llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// fallback to Predict
|
// fallback to Predict
|
||||||
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string) error {
|
||||||
llm.Base.Lock()
|
|
||||||
go func() {
|
go func() {
|
||||||
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
res, err := llm.starcoder.Predict(opts.Prompt, buildPredictOptions(opts)...)
|
||||||
|
|
||||||
@ -47,7 +37,6 @@ func (llm *Starcoder) PredictStream(opts *pb.PredictOptions, results chan string
|
|||||||
}
|
}
|
||||||
results <- res
|
results <- res
|
||||||
close(results)
|
close(results)
|
||||||
llm.Base.Unlock()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
@ -1,4 +1,4 @@
|
|||||||
package whisper
|
package transcribe
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -7,8 +7,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
wav "github.com/go-audio/wav"
|
"github.com/go-audio/wav"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
func sh(c string) (string, error) {
|
func sh(c string) (string, error) {
|
||||||
@ -29,8 +29,8 @@ func audioToWav(src, dst string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Transcript(model whisper.Model, audiopath, language string, threads uint) (api.Result, error) {
|
func Transcript(model whisper.Model, audiopath, language string, threads uint) (schema.Result, error) {
|
||||||
res := api.Result{}
|
res := schema.Result{}
|
||||||
|
|
||||||
dir, err := os.MkdirTemp("", "whisper")
|
dir, err := os.MkdirTemp("", "whisper")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -90,7 +90,7 @@ func Transcript(model whisper.Model, audiopath, language string, threads uint) (
|
|||||||
tokens = append(tokens, t.Id)
|
tokens = append(tokens, t.Id)
|
||||||
}
|
}
|
||||||
|
|
||||||
segment := api.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
segment := schema.Segment{Id: s.Num, Text: s.Text, Start: s.Start, End: s.End, Tokens: tokens}
|
||||||
res.Segments = append(res.Segments, segment)
|
res.Segments = append(res.Segments, segment)
|
||||||
|
|
||||||
res.Text += s.Text
|
res.Text += s.Text
|
@ -4,14 +4,13 @@ package transcribe
|
|||||||
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
|
||||||
import (
|
import (
|
||||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
"github.com/go-skynet/LocalAI/pkg/grpc/base"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
whisperutil "github.com/go-skynet/LocalAI/pkg/grpc/whisper"
|
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Whisper struct {
|
type Whisper struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
whisper whisper.Model
|
whisper whisper.Model
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -22,6 +21,6 @@ func (sd *Whisper) Load(opts *pb.ModelOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (api.Result, error) {
|
func (sd *Whisper) AudioTranscription(opts *pb.TranscriptRequest) (schema.Result, error) {
|
||||||
return whisperutil.Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
return Transcript(sd.whisper, opts.Dst, opts.Language, uint(opts.Threads))
|
||||||
}
|
}
|
@ -13,7 +13,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Piper struct {
|
type Piper struct {
|
||||||
base.Base
|
base.SingleThread
|
||||||
piper *PiperB
|
piper *PiperB
|
||||||
}
|
}
|
||||||
|
|
@ -5,34 +5,32 @@ package base
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
gopsutil "github.com/shirou/gopsutil/v3/process"
|
gopsutil "github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Base is a base class for all backends to implement
|
||||||
|
// Note: the backends that does not support multiple requests
|
||||||
|
// should use SingleThread instead
|
||||||
type Base struct {
|
type Base struct {
|
||||||
backendBusy sync.Mutex
|
|
||||||
State pb.StatusResponse_State
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) Busy() bool {
|
func (llm *Base) Locking() bool {
|
||||||
r := llm.backendBusy.TryLock()
|
return false
|
||||||
if r {
|
|
||||||
llm.backendBusy.Unlock()
|
|
||||||
}
|
|
||||||
return r
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) Lock() {
|
func (llm *Base) Lock() {
|
||||||
llm.backendBusy.Lock()
|
panic("not implemented")
|
||||||
llm.State = pb.StatusResponse_BUSY
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) Unlock() {
|
func (llm *Base) Unlock() {
|
||||||
llm.State = pb.StatusResponse_READY
|
panic("not implemented")
|
||||||
llm.backendBusy.Unlock()
|
}
|
||||||
|
|
||||||
|
func (llm *Base) Busy() bool {
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) Load(opts *pb.ModelOptions) error {
|
func (llm *Base) Load(opts *pb.ModelOptions) error {
|
||||||
@ -55,8 +53,8 @@ func (llm *Base) GenerateImage(*pb.GenerateImageRequest) error {
|
|||||||
return fmt.Errorf("unimplemented")
|
return fmt.Errorf("unimplemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (api.Result, error) {
|
func (llm *Base) AudioTranscription(*pb.TranscriptRequest) (schema.Result, error) {
|
||||||
return api.Result{}, fmt.Errorf("unimplemented")
|
return schema.Result{}, fmt.Errorf("unimplemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (llm *Base) TTS(*pb.TTSRequest) error {
|
func (llm *Base) TTS(*pb.TTSRequest) error {
|
||||||
@ -69,7 +67,12 @@ func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationRespons
|
|||||||
|
|
||||||
// backends may wish to call this to capture the gopsutil info, then enhance with additional memory usage details?
|
// backends may wish to call this to capture the gopsutil info, then enhance with additional memory usage details?
|
||||||
func (llm *Base) Status() (pb.StatusResponse, error) {
|
func (llm *Base) Status() (pb.StatusResponse, error) {
|
||||||
|
return pb.StatusResponse{
|
||||||
|
Memory: memoryUsage(),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func memoryUsage() *pb.MemoryUsageData {
|
||||||
mud := pb.MemoryUsageData{
|
mud := pb.MemoryUsageData{
|
||||||
Breakdown: make(map[string]uint64),
|
Breakdown: make(map[string]uint64),
|
||||||
}
|
}
|
||||||
@ -85,9 +88,5 @@ func (llm *Base) Status() (pb.StatusResponse, error) {
|
|||||||
mud.Breakdown["gopsutil-RSS"] = memInfo.RSS
|
mud.Breakdown["gopsutil-RSS"] = memInfo.RSS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return &mud
|
||||||
return pb.StatusResponse{
|
|
||||||
State: llm.State,
|
|
||||||
Memory: &mud,
|
|
||||||
}, nil
|
|
||||||
}
|
}
|
||||||
|
52
pkg/grpc/base/singlethread.go
Normal file
52
pkg/grpc/base/singlethread.go
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
package base
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SingleThread are backends that does not support multiple requests.
|
||||||
|
// There will be only one request being served at the time.
|
||||||
|
// This is useful for models that are not thread safe and cannot run
|
||||||
|
// multiple requests at the same time.
|
||||||
|
type SingleThread struct {
|
||||||
|
Base
|
||||||
|
backendBusy sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Locking returns true if the backend needs to lock resources
|
||||||
|
func (llm *SingleThread) Locking() bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *SingleThread) Lock() {
|
||||||
|
llm.backendBusy.Lock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *SingleThread) Unlock() {
|
||||||
|
llm.backendBusy.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (llm *SingleThread) Busy() bool {
|
||||||
|
r := llm.backendBusy.TryLock()
|
||||||
|
if r {
|
||||||
|
llm.backendBusy.Unlock()
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
|
||||||
|
// backends may wish to call this to capture the gopsutil info, then enhance with additional memory usage details?
|
||||||
|
func (llm *SingleThread) Status() (pb.StatusResponse, error) {
|
||||||
|
mud := memoryUsage()
|
||||||
|
|
||||||
|
state := pb.StatusResponse_READY
|
||||||
|
if llm.Busy() {
|
||||||
|
state = pb.StatusResponse_BUSY
|
||||||
|
}
|
||||||
|
|
||||||
|
return pb.StatusResponse{
|
||||||
|
State: state,
|
||||||
|
Memory: mud,
|
||||||
|
}, nil
|
||||||
|
}
|
@ -7,8 +7,8 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
"google.golang.org/grpc/credentials/insecure"
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
)
|
)
|
||||||
@ -158,7 +158,7 @@ func (c *Client) TTS(ctx context.Context, in *pb.TTSRequest, opts ...grpc.CallOp
|
|||||||
return client.TTS(ctx, in, opts...)
|
return client.TTS(ctx, in, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*api.Result, error) {
|
func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*schema.Result, error) {
|
||||||
c.setBusy(true)
|
c.setBusy(true)
|
||||||
defer c.setBusy(false)
|
defer c.setBusy(false)
|
||||||
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
conn, err := grpc.Dial(c.address, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
||||||
@ -171,14 +171,14 @@ func (c *Client) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
tresult := &api.Result{}
|
tresult := &schema.Result{}
|
||||||
for _, s := range res.Segments {
|
for _, s := range res.Segments {
|
||||||
tks := []int{}
|
tks := []int{}
|
||||||
for _, t := range s.Tokens {
|
for _, t := range s.Tokens {
|
||||||
tks = append(tks, int(t))
|
tks = append(tks, int(t))
|
||||||
}
|
}
|
||||||
tresult.Segments = append(tresult.Segments,
|
tresult.Segments = append(tresult.Segments,
|
||||||
api.Segment{
|
schema.Segment{
|
||||||
Text: s.Text,
|
Text: s.Text,
|
||||||
Id: int(s.Id),
|
Id: int(s.Id),
|
||||||
Start: time.Duration(s.Start),
|
Start: time.Duration(s.Start),
|
||||||
|
@ -1,18 +1,21 @@
|
|||||||
package grpc
|
package grpc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/go-skynet/LocalAI/api/schema"
|
||||||
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
|
||||||
"github.com/go-skynet/LocalAI/pkg/grpc/whisper/api"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type LLM interface {
|
type LLM interface {
|
||||||
Busy() bool
|
Busy() bool
|
||||||
|
Lock()
|
||||||
|
Unlock()
|
||||||
|
Locking() bool
|
||||||
Predict(*pb.PredictOptions) (string, error)
|
Predict(*pb.PredictOptions) (string, error)
|
||||||
PredictStream(*pb.PredictOptions, chan string) error
|
PredictStream(*pb.PredictOptions, chan string) error
|
||||||
Load(*pb.ModelOptions) error
|
Load(*pb.ModelOptions) error
|
||||||
Embeddings(*pb.PredictOptions) ([]float32, error)
|
Embeddings(*pb.PredictOptions) ([]float32, error)
|
||||||
GenerateImage(*pb.GenerateImageRequest) error
|
GenerateImage(*pb.GenerateImageRequest) error
|
||||||
AudioTranscription(*pb.TranscriptRequest) (api.Result, error)
|
AudioTranscription(*pb.TranscriptRequest) (schema.Result, error)
|
||||||
TTS(*pb.TTSRequest) error
|
TTS(*pb.TTSRequest) error
|
||||||
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
|
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
|
||||||
Status() (pb.StatusResponse, error)
|
Status() (pb.StatusResponse, error)
|
||||||
|
@ -30,6 +30,10 @@ func (s *server) Health(ctx context.Context, in *pb.HealthMessage) (*pb.Reply, e
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) Embedding(ctx context.Context, in *pb.PredictOptions) (*pb.EmbeddingResult, error) {
|
func (s *server) Embedding(ctx context.Context, in *pb.PredictOptions) (*pb.EmbeddingResult, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
embeds, err := s.llm.Embeddings(in)
|
embeds, err := s.llm.Embeddings(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -39,6 +43,10 @@ func (s *server) Embedding(ctx context.Context, in *pb.PredictOptions) (*pb.Embe
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result, error) {
|
func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
err := s.llm.Load(in)
|
err := s.llm.Load(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &pb.Result{Message: fmt.Sprintf("Error loading model: %s", err.Error()), Success: false}, err
|
return &pb.Result{Message: fmt.Sprintf("Error loading model: %s", err.Error()), Success: false}, err
|
||||||
@ -47,11 +55,19 @@ func (s *server) LoadModel(ctx context.Context, in *pb.ModelOptions) (*pb.Result
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) {
|
func (s *server) Predict(ctx context.Context, in *pb.PredictOptions) (*pb.Reply, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
result, err := s.llm.Predict(in)
|
result, err := s.llm.Predict(in)
|
||||||
return newReply(result), err
|
return newReply(result), err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest) (*pb.Result, error) {
|
func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest) (*pb.Result, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
err := s.llm.GenerateImage(in)
|
err := s.llm.GenerateImage(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &pb.Result{Message: fmt.Sprintf("Error generating image: %s", err.Error()), Success: false}, err
|
return &pb.Result{Message: fmt.Sprintf("Error generating image: %s", err.Error()), Success: false}, err
|
||||||
@ -60,6 +76,10 @@ func (s *server) GenerateImage(ctx context.Context, in *pb.GenerateImageRequest)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
|
func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
err := s.llm.TTS(in)
|
err := s.llm.TTS(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
|
return &pb.Result{Message: fmt.Sprintf("Error generating audio: %s", err.Error()), Success: false}, err
|
||||||
@ -68,6 +88,10 @@ func (s *server) TTS(ctx context.Context, in *pb.TTSRequest) (*pb.Result, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
|
func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptRequest) (*pb.TranscriptResult, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
result, err := s.llm.AudioTranscription(in)
|
result, err := s.llm.AudioTranscription(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -93,7 +117,10 @@ func (s *server) AudioTranscription(ctx context.Context, in *pb.TranscriptReques
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictStreamServer) error {
|
func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictStreamServer) error {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
resultChan := make(chan string)
|
resultChan := make(chan string)
|
||||||
|
|
||||||
done := make(chan bool)
|
done := make(chan bool)
|
||||||
@ -111,6 +138,10 @@ func (s *server) PredictStream(in *pb.PredictOptions, stream pb.Backend_PredictS
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *server) TokenizeString(ctx context.Context, in *pb.PredictOptions) (*pb.TokenizationResponse, error) {
|
func (s *server) TokenizeString(ctx context.Context, in *pb.PredictOptions) (*pb.TokenizationResponse, error) {
|
||||||
|
if s.llm.Locking() {
|
||||||
|
s.llm.Lock()
|
||||||
|
defer s.llm.Unlock()
|
||||||
|
}
|
||||||
res, err := s.llm.TokenizeString(in)
|
res, err := s.llm.TokenizeString(in)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
Loading…
Reference in New Issue
Block a user