2024-01-05 17:04:46 +00:00
|
|
|
package openai
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
|
2024-02-21 01:21:19 +00:00
|
|
|
"github.com/go-skynet/LocalAI/core/backend"
|
2024-03-01 15:19:53 +00:00
|
|
|
"github.com/go-skynet/LocalAI/core/config"
|
|
|
|
model "github.com/go-skynet/LocalAI/pkg/model"
|
2024-01-05 17:04:46 +00:00
|
|
|
|
|
|
|
"github.com/gofiber/fiber/v2"
|
|
|
|
"github.com/rs/zerolog/log"
|
|
|
|
)
|
|
|
|
|
2024-03-29 21:29:33 +00:00
|
|
|
// TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
|
|
|
|
// @Summary Transcribes audio into the input language.
|
|
|
|
// @accept multipart/form-data
|
|
|
|
// @Param model formData string true "model"
|
|
|
|
// @Param file formData file true "file"
|
|
|
|
// @Success 200 {object} map[string]string "Response"
|
|
|
|
// @Router /v1/audio/transcriptions [post]
|
2024-03-01 15:19:53 +00:00
|
|
|
func TranscriptEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
|
2024-01-05 17:04:46 +00:00
|
|
|
return func(c *fiber.Ctx) error {
|
2024-03-01 15:19:53 +00:00
|
|
|
m, input, err := readRequest(c, ml, appConfig, false)
|
2024-01-05 17:04:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
|
|
|
}
|
|
|
|
|
2024-03-01 15:19:53 +00:00
|
|
|
config, input, err := mergeRequestWithConfig(m, input, cl, ml, appConfig.Debug, appConfig.Threads, appConfig.ContextSize, appConfig.F16)
|
2024-01-05 17:04:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed reading parameters from request:%w", err)
|
|
|
|
}
|
|
|
|
// retrieve the file data from the request
|
|
|
|
file, err := c.FormFile("file")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
f, err := file.Open()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
dir, err := os.MkdirTemp("", "whisper")
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer os.RemoveAll(dir)
|
|
|
|
|
|
|
|
dst := filepath.Join(dir, path.Base(file.Filename))
|
|
|
|
dstFile, err := os.Create(dst)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := io.Copy(dstFile, f); err != nil {
|
|
|
|
log.Debug().Msgf("Audio file copying error %+v - %+v - err %+v", file.Filename, dst, err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug().Msgf("Audio file copied to: %+v", dst)
|
|
|
|
|
2024-03-01 15:19:53 +00:00
|
|
|
tr, err := backend.ModelTranscription(dst, input.Language, ml, *config, appConfig)
|
2024-01-05 17:04:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug().Msgf("Trascribed: %+v", tr)
|
|
|
|
// TODO: handle different outputs here
|
|
|
|
return c.Status(http.StatusOK).JSON(tr)
|
|
|
|
}
|
|
|
|
}
|