Add text-to-speech support. (#13)
Reviewed-on: #13 Co-authored-by: Anna Rose Wiggins <annabunches@gmail.com> Co-committed-by: Anna Rose Wiggins <annabunches@gmail.com>
This commit is contained in:
parent
3e4367f5e7
commit
9d262977f9
8 changed files with 203 additions and 32 deletions
|
@ -2,23 +2,22 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/holoplot/go-evdev"
|
||||
flag "github.com/spf13/pflag"
|
||||
|
||||
"git.annabunches.net/annabunches/joyful/internal/config"
|
||||
"git.annabunches.net/annabunches/joyful/internal/logger"
|
||||
"git.annabunches.net/annabunches/joyful/internal/mappingrules"
|
||||
"git.annabunches.net/annabunches/joyful/internal/virtualdevice"
|
||||
"github.com/holoplot/go-evdev"
|
||||
)
|
||||
|
||||
func getConfigDir() string {
|
||||
configFlag := flag.String("config", "~/.config/joyful", "Directory to read configuration from.")
|
||||
flag.Parse()
|
||||
configDir := strings.ReplaceAll(*configFlag, "~", "${HOME}")
|
||||
func getConfigDir(dir string) string {
|
||||
configDir := strings.ReplaceAll(dir, "~", "${HOME}")
|
||||
return os.ExpandEnv(configDir)
|
||||
}
|
||||
|
||||
|
@ -62,27 +61,46 @@ func initPhysicalDevices(config *config.ConfigParser) map[string]*evdev.InputDev
|
|||
}
|
||||
|
||||
func main() {
|
||||
// parse command-line
|
||||
var configFlag string
|
||||
flag.BoolVarP(&logger.IsDebugMode, "debug", "d", false, "Output very verbose debug messages.")
|
||||
flag.StringVarP(&configFlag, "config", "c", "~/.config/joyful", "Directory to read configuration from.")
|
||||
ttsOps := addTTSFlags()
|
||||
flag.Parse()
|
||||
|
||||
// parse configs
|
||||
configDir := getConfigDir()
|
||||
configDir := getConfigDir(configFlag)
|
||||
config := readConfig(configDir)
|
||||
|
||||
// initialize TTS
|
||||
tts, err := newTTS(ttsOps)
|
||||
logger.LogIfError(err, "Failed to initialize TTS")
|
||||
|
||||
// Initialize virtual devices with event buffers
|
||||
vBuffersByName, vBuffersByDevice := initVirtualBuffers(config)
|
||||
|
||||
// Initialize physical devices
|
||||
pDevices := initPhysicalDevices(config)
|
||||
|
||||
// Load the rules
|
||||
rules, eventChannel, cancel, wg := loadRules(config, pDevices, getVirtualDevices(vBuffersByName))
|
||||
|
||||
// initialize the mode variable
|
||||
mode := config.GetModes()[0]
|
||||
|
||||
// initialize TTS phrases for modes
|
||||
for _, m := range config.GetModes() {
|
||||
tts.AddMessage(m)
|
||||
logger.LogDebugf("Added TTS message '%s'", m)
|
||||
}
|
||||
|
||||
fmt.Println("Joyful Running! Press Ctrl+C to quit. Press Enter to reload rules.")
|
||||
if len(config.GetModes()) > 1 {
|
||||
logger.Logf("Initial mode set to '%s'", mode)
|
||||
}
|
||||
|
||||
for {
|
||||
lastMode := mode
|
||||
// Get an event (blocks if necessary)
|
||||
channelEvent := <-eventChannel
|
||||
|
||||
|
@ -124,6 +142,10 @@ func main() {
|
|||
rules, eventChannel, cancel, wg = loadRules(config, pDevices, getVirtualDevices(vBuffersByName))
|
||||
fmt.Println("Config re-loaded. Only rule changes applied. Device and Mode changes require restart.")
|
||||
}
|
||||
|
||||
if lastMode != mode && tts != nil {
|
||||
tts.Say(mode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
135
cmd/joyful/tts.go
Normal file
135
cmd/joyful/tts.go
Normal file
|
@ -0,0 +1,135 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"git.annabunches.net/annabunches/joyful/internal/logger"
|
||||
"github.com/ebitengine/oto/v3"
|
||||
flag "github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
type TTSOptions struct {
|
||||
Disabled bool
|
||||
Voice string
|
||||
Volume int
|
||||
Pitch int
|
||||
Range int
|
||||
Speed int
|
||||
}
|
||||
|
||||
type TTS struct {
|
||||
options *TTSOptions
|
||||
otoCtx *oto.Context
|
||||
phrases map[string][]byte
|
||||
}
|
||||
|
||||
const (
|
||||
playbackCheckIntervalMs = 100
|
||||
)
|
||||
|
||||
// TODO: make most of this configurable via file
|
||||
func addTTSFlags() *TTSOptions {
|
||||
ops := &TTSOptions{}
|
||||
|
||||
flag.BoolVar(&ops.Disabled, "no-tts", false, "Disable text-to-speech.")
|
||||
flag.StringVar(&ops.Voice, "tts-voice", "en", "Which voice to use for TTS; see 'espeak --voices' for a full list of options.")
|
||||
flag.IntVar(&ops.Volume, "tts-volume", 100, "Text to speech volume")
|
||||
flag.IntVar(&ops.Pitch, "tts-pitch", 50, "Text to speech volume")
|
||||
flag.IntVar(&ops.Range, "tts-range", 50, "Text to speech volume")
|
||||
flag.IntVar(&ops.Range, "tts-speed", 175, "Text to speech speaking speed (in words per minute)")
|
||||
|
||||
return ops
|
||||
}
|
||||
|
||||
func makeOtoContext() (*oto.Context, error) {
|
||||
op := &oto.NewContextOptions{
|
||||
SampleRate: 22050,
|
||||
ChannelCount: 1,
|
||||
Format: oto.FormatSignedInt16LE,
|
||||
}
|
||||
|
||||
otoCtx, readyChan, err := oto.NewContext(op)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
<-readyChan // wait for initialization
|
||||
|
||||
return otoCtx, nil
|
||||
}
|
||||
|
||||
func newTTS(ops *TTSOptions) (*TTS, error) {
|
||||
if ops.Disabled {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
context, err := makeOtoContext()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &TTS{
|
||||
options: ops,
|
||||
otoCtx: context,
|
||||
phrases: make(map[string][]byte),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *TTS) AddMessage(msg string) {
|
||||
// TODO: need to get lots of input validation in here
|
||||
// We execute `espeak-ng` directly because extant libraries produce terrible output
|
||||
// compared to the command-line utility. This also gives us a chance to
|
||||
cmd := exec.Command(
|
||||
"espeak-ng", "--stdout",
|
||||
"-v", t.options.Voice,
|
||||
"-a", strconv.Itoa(t.options.Volume),
|
||||
"-p", strconv.Itoa(t.options.Pitch),
|
||||
"-P", strconv.Itoa(t.options.Range),
|
||||
"-s", strconv.Itoa(t.options.Speed),
|
||||
msg,
|
||||
)
|
||||
|
||||
wavData, err := cmd.Output()
|
||||
if err != nil {
|
||||
logger.LogError(err, "Failed to create TTS data")
|
||||
return
|
||||
}
|
||||
|
||||
t.phrases[msg] = wavData
|
||||
}
|
||||
|
||||
// "Say" generates TTS audio and plays it in a go routine
|
||||
func (t *TTS) Say(msg string) error {
|
||||
if _, ok := t.phrases[msg]; !ok {
|
||||
return fmt.Errorf("tried to play non-buffered phrase '%s'", msg)
|
||||
}
|
||||
|
||||
go func(buf []byte) {
|
||||
buffer := bytes.NewBuffer(buf)
|
||||
player := t.otoCtx.NewPlayer(buffer)
|
||||
|
||||
volume := 0.0
|
||||
player.SetVolume(volume)
|
||||
player.Play()
|
||||
|
||||
// Gradually ramp up the volume to avoid harsh clicks
|
||||
for player.Volume() < 1.0 {
|
||||
volume += 0.01
|
||||
if volume > 1.0 {
|
||||
volume = 1.0
|
||||
}
|
||||
|
||||
player.SetVolume(volume)
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
|
||||
for player.IsPlaying() {
|
||||
time.Sleep(playbackCheckIntervalMs * time.Millisecond)
|
||||
}
|
||||
}(t.phrases[msg])
|
||||
|
||||
return nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue