Add text-to-speech for announcing mode changes.

This commit is contained in:
Anna Rose Wiggins 2025-07-29 13:01:11 -04:00
parent 3e4367f5e7
commit 8262d78b54
8 changed files with 209 additions and 32 deletions

View file

@ -2,23 +2,22 @@ package main
import (
"context"
"flag"
"fmt"
"os"
"strings"
"sync"
"github.com/holoplot/go-evdev"
flag "github.com/spf13/pflag"
"git.annabunches.net/annabunches/joyful/internal/config"
"git.annabunches.net/annabunches/joyful/internal/logger"
"git.annabunches.net/annabunches/joyful/internal/mappingrules"
"git.annabunches.net/annabunches/joyful/internal/virtualdevice"
"github.com/holoplot/go-evdev"
)
func getConfigDir() string {
configFlag := flag.String("config", "~/.config/joyful", "Directory to read configuration from.")
flag.Parse()
configDir := strings.ReplaceAll(*configFlag, "~", "${HOME}")
func getConfigDir(dir string) string {
configDir := strings.ReplaceAll(dir, "~", "${HOME}")
return os.ExpandEnv(configDir)
}
@ -62,16 +61,31 @@ func initPhysicalDevices(config *config.ConfigParser) map[string]*evdev.InputDev
}
func main() {
// parse command-line
var configFlag string
var ttsVoiceFlag string
var ttsFlag bool
flag.StringVarP(&configFlag, "config", "c", "~/.config/joyful", "Directory to read configuration from.")
addTTSFlags(&ttsFlag, &ttsVoiceFlag)
flag.Parse()
// parse configs
configDir := getConfigDir()
configDir := getConfigDir(configFlag)
config := readConfig(configDir)
tts, err := newTTS(ttsFlag, ttsVoiceFlag)
logger.LogIfError(err, "Failed to initialize TTS")
if tts != nil {
defer tts.Cleanup()
}
// Initialize virtual devices with event buffers
vBuffersByName, vBuffersByDevice := initVirtualBuffers(config)
// Initialize physical devices
pDevices := initPhysicalDevices(config)
// Load the rules
rules, eventChannel, cancel, wg := loadRules(config, pDevices, getVirtualDevices(vBuffersByName))
// initialize the mode variable
@ -83,6 +97,7 @@ func main() {
}
for {
lastMode := mode
// Get an event (blocks if necessary)
channelEvent := <-eventChannel
@ -124,6 +139,10 @@ func main() {
rules, eventChannel, cancel, wg = loadRules(config, pDevices, getVirtualDevices(vBuffersByName))
fmt.Println("Config re-loaded. Only rule changes applied. Device and Mode changes require restart.")
}
if lastMode != mode && tts != nil {
tts.Say(mode)
}
}
}

106
cmd/joyful/tts.go Normal file
View file

@ -0,0 +1,106 @@
//go:build !notts
package main
import (
"bytes"
"io"
"os"
"time"
"git.annabunches.net/annabunches/joyful/internal/logger"
"github.com/amitybell/piper"
asset "github.com/amitybell/piper-asset"
alan "github.com/amitybell/piper-voice-alan"
jenny "github.com/amitybell/piper-voice-jenny"
"github.com/ebitengine/oto/v3"
flag "github.com/spf13/pflag"
)
type TTS struct {
piper.TTS
dataDir string
otoCtx *oto.Context
}
const (
playbackCheckIntervalMs = 250
playbackSeekOffsetBytes = 1024
)
func addTTSFlags(ttsFlag *bool, ttsVoiceFlag *string) {
flag.BoolVar(ttsFlag, "notts", false, "Disable text-to-speech on mode change.")
flag.StringVar(ttsVoiceFlag, "voice", "alan", "Which voice to use for TTS; must be 'alan' or 'jenny'")
}
func newTTS(disable bool, voice string) (*TTS, error) {
if disable {
return nil, nil
}
dataDir, err := os.MkdirTemp("", "joyful-piper.")
if err != nil {
return nil, err
}
var ass asset.Asset
switch voice {
case "jenny":
ass = jenny.Asset
case "alan":
ass = alan.Asset
default:
ass = alan.Asset
}
pTTS, err := piper.NewEmbedded(dataDir, ass)
if err != nil {
return nil, err
}
op := &oto.NewContextOptions{
SampleRate: 22050,
ChannelCount: 1,
Format: oto.FormatSignedInt16LE,
}
otoCtx, readyChan, err := oto.NewContext(op)
if err != nil {
return nil, err
}
<-readyChan // wait for initialization
return &TTS{
TTS: *pTTS,
dataDir: dataDir,
otoCtx: otoCtx,
}, nil
}
// "Say" generates TTS audio and plays it in a go routine
func (t *TTS) Say(msg string) {
go func() {
wav, err := t.Synthesize(msg)
if err != nil {
logger.LogError(err, "")
return
}
wavReader := bytes.NewReader(wav)
player := t.otoCtx.NewPlayer(wavReader)
// We seek some bytes into the generated audio because there's a click
// and a long delay at the beginning of the data.
player.Seek(playbackSeekOffsetBytes, io.SeekStart)
player.Play()
for player.IsPlaying() {
time.Sleep(playbackCheckIntervalMs * time.Millisecond)
}
}()
}
func (t *TTS) Cleanup() {
os.RemoveAll(t.dataDir)
}

16
cmd/joyful/tts_stub.go Normal file
View file

@ -0,0 +1,16 @@
//go:build notts
package main
type Speaker interface {
Say(string)
Cleanup()
}
func newTTS(_ bool, _ string) (Speaker, error) {
return nil, nil
}
func addTTSFlags(ttsFlag *bool, ttsVoiceFlag *string) {
return
}