LocalAI/core/backend/tts.go
Dave eed5706994
refactor: backend/service split, channel-based llm flow (#1963)
Refactor: channel based llm flow and services split

---------

Signed-off-by: Dave Lee <dave@gray101.com>
2024-04-13 09:45:34 +02:00

137 lines
3.8 KiB
Go

package backend
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/concurrency"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/go-skynet/LocalAI/pkg/utils"
)
type TextToSpeechBackendService struct {
ml *model.ModelLoader
bcl *config.BackendConfigLoader
appConfig *config.ApplicationConfig
}
func NewTextToSpeechBackendService(ml *model.ModelLoader, bcl *config.BackendConfigLoader, appConfig *config.ApplicationConfig) *TextToSpeechBackendService {
return &TextToSpeechBackendService{
ml: ml,
bcl: bcl,
appConfig: appConfig,
}
}
func (ttsbs *TextToSpeechBackendService) TextToAudioFile(request *schema.TTSRequest) <-chan concurrency.ErrorOr[*string] {
responseChannel := make(chan concurrency.ErrorOr[*string])
go func(request *schema.TTSRequest) {
cfg, err := ttsbs.bcl.LoadBackendConfigFileByName(request.Model, ttsbs.appConfig.ModelPath,
config.LoadOptionDebug(ttsbs.appConfig.Debug),
config.LoadOptionThreads(ttsbs.appConfig.Threads),
config.LoadOptionContextSize(ttsbs.appConfig.ContextSize),
config.LoadOptionF16(ttsbs.appConfig.F16),
)
if err != nil {
responseChannel <- concurrency.ErrorOr[*string]{Error: err}
close(responseChannel)
return
}
if request.Backend != "" {
cfg.Backend = request.Backend
}
outFile, _, err := modelTTS(cfg.Backend, request.Input, cfg.Model, request.Voice, ttsbs.ml, ttsbs.appConfig, cfg)
if err != nil {
responseChannel <- concurrency.ErrorOr[*string]{Error: err}
close(responseChannel)
return
}
responseChannel <- concurrency.ErrorOr[*string]{Value: &outFile}
close(responseChannel)
}(request)
return responseChannel
}
func modelTTS(backend, text, modelFile string, voice string, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig *config.BackendConfig) (string, *proto.Result, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}
grpcOpts := gRPCModelOpts(backendConfig)
opts := modelOpts(&config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
ttsModel, err := loader.BackendLoader(opts...)
if err != nil {
return "", nil, err
}
if ttsModel == nil {
return "", nil, fmt.Errorf("could not load piper model")
}
if err := os.MkdirAll(appConfig.AudioDir, 0755); err != nil {
return "", nil, fmt.Errorf("failed creating audio directory: %s", err)
}
fileName := generateUniqueFileName(appConfig.AudioDir, "tts", ".wav")
filePath := filepath.Join(appConfig.AudioDir, fileName)
// If the model file is not empty, we pass it joined with the model path
modelPath := ""
if modelFile != "" {
// If the model file is not empty, we pass it joined with the model path
// Checking first that it exists and is not outside ModelPath
// TODO: we should actually first check if the modelFile is looking like
// a FS path
mp := filepath.Join(loader.ModelPath, modelFile)
if _, err := os.Stat(mp); err == nil {
if err := utils.VerifyPath(mp, appConfig.ModelPath); err != nil {
return "", nil, err
}
modelPath = mp
} else {
modelPath = modelFile
}
}
res, err := ttsModel.TTS(context.Background(), &proto.TTSRequest{
Text: text,
Model: modelPath,
Voice: voice,
Dst: filePath,
})
return filePath, res, err
}
func generateUniqueFileName(dir, baseName, ext string) string {
counter := 1
fileName := baseName + ext
for {
filePath := filepath.Join(dir, fileName)
_, err := os.Stat(filePath)
if os.IsNotExist(err) {
return fileName
}
counter++
fileName = fmt.Sprintf("%s_%d%s", baseName, counter, ext)
}
}