package main import ( "bytes" "fmt" "os/exec" "strconv" "time" "git.annabunches.net/annabunches/joyful/internal/logger" "github.com/ebitengine/oto/v3" flag "github.com/spf13/pflag" ) type TTSOptions struct { Disabled bool Voice string Volume int Pitch int Range int Speed int } type TTS struct { options *TTSOptions otoCtx *oto.Context phrases map[string][]byte } const ( playbackCheckIntervalMs = 100 ) // TODO: make most of this configurable via file func addTTSFlags() *TTSOptions { ops := &TTSOptions{} flag.BoolVar(&ops.Disabled, "no-tts", false, "Disable text-to-speech.") flag.StringVar(&ops.Voice, "tts-voice", "en", "Which voice to use for TTS; see 'espeak --voices' for a full list of options.") flag.IntVar(&ops.Volume, "tts-volume", 100, "Text to speech volume") flag.IntVar(&ops.Pitch, "tts-pitch", 50, "Text to speech volume") flag.IntVar(&ops.Range, "tts-range", 50, "Text to speech volume") flag.IntVar(&ops.Range, "tts-speed", 175, "Text to speech speaking speed (in words per minute)") return ops } func makeOtoContext() (*oto.Context, error) { op := &oto.NewContextOptions{ SampleRate: 22050, ChannelCount: 1, Format: oto.FormatSignedInt16LE, } otoCtx, readyChan, err := oto.NewContext(op) if err != nil { return nil, err } <-readyChan // wait for initialization return otoCtx, nil } func newTTS(ops *TTSOptions) (*TTS, error) { if ops.Disabled { return nil, nil } context, err := makeOtoContext() if err != nil { return nil, err } return &TTS{ options: ops, otoCtx: context, phrases: make(map[string][]byte), }, nil } func (t *TTS) AddMessage(msg string) { // TODO: need to get lots of input validation in here // We execute `espeak-ng` directly because extant libraries produce terrible output // compared to the command-line utility. This also gives us a chance to cmd := exec.Command( "espeak-ng", "--stdout", "-v", t.options.Voice, "-a", strconv.Itoa(t.options.Volume), "-p", strconv.Itoa(t.options.Pitch), "-P", strconv.Itoa(t.options.Range), "-s", strconv.Itoa(t.options.Speed), msg, ) wavData, err := cmd.Output() if err != nil { logger.LogError(err, "Failed to create TTS data") return } t.phrases[msg] = wavData } // "Say" generates TTS audio and plays it in a go routine func (t *TTS) Say(msg string) error { if _, ok := t.phrases[msg]; !ok { return fmt.Errorf("tried to play non-buffered phrase '%s'", msg) } go func(buf []byte) { buffer := bytes.NewBuffer(buf) player := t.otoCtx.NewPlayer(buffer) volume := 0.0 player.SetVolume(volume) player.Play() // Gradually ramp up the volume to avoid harsh clicks for player.Volume() < 1.0 { volume += 0.01 if volume > 1.0 { volume = 1.0 } player.SetVolume(volume) time.Sleep(1 * time.Millisecond) } for player.IsPlaying() { time.Sleep(playbackCheckIntervalMs * time.Millisecond) } }(t.phrases[msg]) return nil }