LocalAI/pkg/whisper/whisper.go

114 lines
2.2 KiB
Go
Raw Normal View History

2023-05-09 09:43:50 +00:00
package whisper
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"time"
2023-05-09 09:43:50 +00:00
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
wav "github.com/go-audio/wav"
)
type Segment struct {
Id int `json:"id"`
Start time.Duration `json:"start"`
End time.Duration `json:"end"`
Text string `json:"text"`
Tokens []int `json:"tokens"`
}
type Result struct {
Segments []Segment `json:"segments"`
Text string `json:"text"`
}
2023-05-09 09:43:50 +00:00
func sh(c string) (string, error) {
cmd := exec.Command("/bin/sh", "-c", c)
cmd.Env = os.Environ()
o, err := cmd.CombinedOutput()
return string(o), err
}
// AudioToWav converts audio to wav for transcribe. It bashes out to ffmpeg
// TODO: use https://github.com/mccoyst/ogg?
func audioToWav(src, dst string) error {
out, err := sh(fmt.Sprintf("ffmpeg -i %s -format s16le -ar 16000 -ac 1 -acodec pcm_s16le %s", src, dst))
if err != nil {
return fmt.Errorf("error: %w out: %s", err, out)
}
return nil
}
func Transcript(model whisper.Model, audiopath, language string, threads uint) (Result, error) {
res := Result{}
2023-05-09 09:43:50 +00:00
dir, err := os.MkdirTemp("", "whisper")
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
defer os.RemoveAll(dir)
convertedPath := filepath.Join(dir, "converted.wav")
if err := audioToWav(audiopath, convertedPath); err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
// Open samples
fh, err := os.Open(convertedPath)
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
defer fh.Close()
// Read samples
d := wav.NewDecoder(fh)
buf, err := d.FullPCMBuffer()
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
data := buf.AsFloat32Buffer().Data
// Process samples
context, err := model.NewContext()
if err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
context.SetThreads(threads)
2023-05-09 09:43:50 +00:00
if language != "" {
context.SetLanguage(language)
} else {
context.SetLanguage("auto")
2023-05-09 09:43:50 +00:00
}
if err := context.Process(data, nil, nil); err != nil {
return res, err
2023-05-09 09:43:50 +00:00
}
for {
s, err := context.NextSegment()
2023-05-09 09:43:50 +00:00
if err != nil {
break
}
var tokens []int
for _, t := range(s.Tokens) {
tokens = append(tokens, t.Id)
}
segment := Segment{Id: s.Num, Text: s.Text, Start:s.Start, End: s.End, Tokens: tokens}
res.Segments = append(res.Segments, segment)
res.Text += s.Text
2023-05-09 09:43:50 +00:00
}
return res, nil
2023-05-09 09:43:50 +00:00
}