diff --git a/cmd/joyful/main.go b/cmd/joyful/main.go index 9904cb6..17482bf 100644 --- a/cmd/joyful/main.go +++ b/cmd/joyful/main.go @@ -63,21 +63,18 @@ func initPhysicalDevices(config *config.ConfigParser) map[string]*evdev.InputDev func main() { // parse command-line var configFlag string - var ttsVoiceFlag string - var ttsFlag bool + flag.BoolVarP(&logger.IsDebugMode, "debug", "d", false, "Output very verbose debug messages.") flag.StringVarP(&configFlag, "config", "c", "~/.config/joyful", "Directory to read configuration from.") - addTTSFlags(&ttsFlag, &ttsVoiceFlag) + ttsOps := addTTSFlags() flag.Parse() // parse configs configDir := getConfigDir(configFlag) config := readConfig(configDir) - tts, err := newTTS(ttsFlag, ttsVoiceFlag) + // initialize TTS + tts, err := newTTS(ttsOps) logger.LogIfError(err, "Failed to initialize TTS") - if tts != nil { - defer tts.Cleanup() - } // Initialize virtual devices with event buffers vBuffersByName, vBuffersByDevice := initVirtualBuffers(config) @@ -91,6 +88,12 @@ func main() { // initialize the mode variable mode := config.GetModes()[0] + // initialize TTS phrases for modes + for _, m := range config.GetModes() { + tts.AddMessage(m) + logger.LogDebugf("Added TTS message '%s'", m) + } + fmt.Println("Joyful Running! Press Ctrl+C to quit. Press Enter to reload rules.") if len(config.GetModes()) > 1 { logger.Logf("Initial mode set to '%s'", mode) diff --git a/cmd/joyful/tts.go b/cmd/joyful/tts.go index 95340d8..99b709d 100644 --- a/cmd/joyful/tts.go +++ b/cmd/joyful/tts.go @@ -1,65 +1,51 @@ -//go:build !notts - package main import ( "bytes" - "io" - "os" + "fmt" + "os/exec" + "strconv" "time" "git.annabunches.net/annabunches/joyful/internal/logger" - "github.com/amitybell/piper" - asset "github.com/amitybell/piper-asset" - alan "github.com/amitybell/piper-voice-alan" - jenny "github.com/amitybell/piper-voice-jenny" "github.com/ebitengine/oto/v3" flag "github.com/spf13/pflag" ) +type TTSOptions struct { + Disabled bool + Voice string + Volume int + Pitch int + Range int + Speed int +} + type TTS struct { - piper.TTS - dataDir string + options *TTSOptions otoCtx *oto.Context + phrases map[string][]byte } const ( - playbackCheckIntervalMs = 250 - playbackSeekOffsetBytes = 1024 + playbackCheckIntervalMs = 100 ) -func addTTSFlags(ttsFlag *bool, ttsVoiceFlag *string) { - flag.BoolVar(ttsFlag, "notts", false, "Disable text-to-speech on mode change.") - flag.StringVar(ttsVoiceFlag, "voice", "alan", "Which voice to use for TTS; must be 'alan' or 'jenny'") +// TODO: make most of this configurable via file +func addTTSFlags() *TTSOptions { + ops := &TTSOptions{} + flag.BoolVar(&ops.Disabled, "no-tts", false, "Disable text-to-speech.") + flag.StringVar(&ops.Voice, "tts-voice", "en", "Which voice to use for TTS; see 'espeak --voices' for a full list of options.") + flag.IntVar(&ops.Volume, "tts-volume", 100, "Text to speech volume") + flag.IntVar(&ops.Pitch, "tts-pitch", 50, "Text to speech volume") + flag.IntVar(&ops.Range, "tts-range", 50, "Text to speech volume") + flag.IntVar(&ops.Range, "tts-speed", 175, "Text to speech speaking speed (in words per minute)") + + return ops } -func newTTS(disable bool, voice string) (*TTS, error) { - if disable { - return nil, nil - } - - dataDir, err := os.MkdirTemp("", "joyful-piper.") - if err != nil { - return nil, err - } - - var ass asset.Asset - switch voice { - case "jenny": - ass = jenny.Asset - case "alan": - ass = alan.Asset - default: - ass = alan.Asset - } - - pTTS, err := piper.NewEmbedded(dataDir, ass) - - if err != nil { - return nil, err - } - +func makeOtoContext() (*oto.Context, error) { op := &oto.NewContextOptions{ SampleRate: 22050, ChannelCount: 1, @@ -72,35 +58,78 @@ func newTTS(disable bool, voice string) (*TTS, error) { } <-readyChan // wait for initialization + return otoCtx, nil +} + +func newTTS(ops *TTSOptions) (*TTS, error) { + if ops.Disabled { + return nil, nil + } + + context, err := makeOtoContext() + if err != nil { + return nil, err + } + return &TTS{ - TTS: *pTTS, - dataDir: dataDir, - otoCtx: otoCtx, + options: ops, + otoCtx: context, + phrases: make(map[string][]byte), }, nil } -// "Say" generates TTS audio and plays it in a go routine -func (t *TTS) Say(msg string) { - go func() { - wav, err := t.Synthesize(msg) +func (t *TTS) AddMessage(msg string) { + // TODO: need to get lots of input validation in here + // We execute `espeak-ng` directly because extant libraries produce terrible output + // compared to the command-line utility. This also gives us a chance to + cmd := exec.Command( + "espeak-ng", "--stdout", + "-v", t.options.Voice, + "-a", strconv.Itoa(t.options.Volume), + "-p", strconv.Itoa(t.options.Pitch), + "-P", strconv.Itoa(t.options.Range), + "-s", strconv.Itoa(t.options.Speed), + msg, + ) - if err != nil { - logger.LogError(err, "") - return + wavData, err := cmd.Output() + if err != nil { + logger.LogError(err, "Failed to create TTS data") + return + } + + t.phrases[msg] = wavData +} + +// "Say" generates TTS audio and plays it in a go routine +func (t *TTS) Say(msg string) error { + if _, ok := t.phrases[msg]; !ok { + return fmt.Errorf("tried to play non-buffered phrase '%s'", msg) + } + + go func(buf []byte) { + buffer := bytes.NewBuffer(buf) + player := t.otoCtx.NewPlayer(buffer) + + volume := 0.0 + player.SetVolume(volume) + player.Play() + + // Gradually ramp up the volume to avoid harsh clicks + for player.Volume() < 1.0 { + volume += 0.01 + if volume > 1.0 { + volume = 1.0 + } + + player.SetVolume(volume) + time.Sleep(1 * time.Millisecond) } - wavReader := bytes.NewReader(wav) - player := t.otoCtx.NewPlayer(wavReader) - // We seek some bytes into the generated audio because there's a click - // and a long delay at the beginning of the data. - player.Seek(playbackSeekOffsetBytes, io.SeekStart) - player.Play() for player.IsPlaying() { time.Sleep(playbackCheckIntervalMs * time.Millisecond) } - }() -} + }(t.phrases[msg]) -func (t *TTS) Cleanup() { - os.RemoveAll(t.dataDir) + return nil } diff --git a/cmd/joyful/tts_stub.go b/cmd/joyful/tts_stub.go deleted file mode 100644 index 170674a..0000000 --- a/cmd/joyful/tts_stub.go +++ /dev/null @@ -1,16 +0,0 @@ -//go:build notts - -package main - -type Speaker interface { - Say(string) - Cleanup() -} - -func newTTS(_ bool, _ string) (Speaker, error) { - return nil, nil -} - -func addTTSFlags(ttsFlag *bool, ttsVoiceFlag *string) { - return -} diff --git a/go.mod b/go.mod index 5d58c6e..5007672 100644 --- a/go.mod +++ b/go.mod @@ -3,10 +3,6 @@ module git.annabunches.net/annabunches/joyful go 1.24.4 require ( - github.com/amitybell/piper v0.0.0-20250621082041-2bb74e3a4a55 - github.com/amitybell/piper-asset v0.0.0-20231030194325-d36a29e3b1fd - github.com/amitybell/piper-voice-alan v0.0.0-20231118093148-059963c24dbd - github.com/amitybell/piper-voice-jenny v0.0.0-20231118093224-dcf0d49e46b7 github.com/ebitengine/oto/v3 v3.3.3 github.com/goccy/go-yaml v1.18.0 github.com/holoplot/go-evdev v0.0.0-20240306072622-217e18f17db1 @@ -17,12 +13,8 @@ require ( ) require ( - github.com/adrg/xdg v0.5.3 // indirect - github.com/amitybell/piper-bin-linux v0.0.0-20250621082830-f5d5d85fa076 // indirect - github.com/amitybell/piper-bin-windows v0.0.0-20231118093113-cc2cef2f6b74 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/ebitengine/purego v0.8.4 // indirect - github.com/klauspost/compress v1.18.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/stretchr/objx v0.5.2 // indirect golang.org/x/sys v0.34.0 // indirect diff --git a/go.sum b/go.sum index e756701..70e03cc 100644 --- a/go.sum +++ b/go.sum @@ -1,19 +1,3 @@ -github.com/adrg/xdg v0.5.3 h1:xRnxJXne7+oWDatRhR1JLnvuccuIeCoBu2rtuLqQB78= -github.com/adrg/xdg v0.5.3/go.mod h1:nlTsY+NNiCBGCK2tpm09vRqfVzrc2fLmXGpBLF0zlTQ= -github.com/amitybell/piper v0.0.0-20250621082041-2bb74e3a4a55 h1:8MKEDgDBbYKAphlRUcAEHT1Uam7xBjA5E/SmGHhNH10= -github.com/amitybell/piper v0.0.0-20250621082041-2bb74e3a4a55/go.mod h1:y0aDZdCM3erPmpX+rDGoF0O2ZdCqZvAxNjYUPrK/O7U= -github.com/amitybell/piper-asset v0.0.0-20231030194325-d36a29e3b1fd h1:4MLHn2cCVhzhPLlPO6946h1S0yk3o7Ry1831DEa5EcE= -github.com/amitybell/piper-asset v0.0.0-20231030194325-d36a29e3b1fd/go.mod h1:MiDKnt4NenfcrsVxYAxQW0nu4zjFYQPjGzzLB5MvOz8= -github.com/amitybell/piper-bin-linux v0.0.0-20250621082830-f5d5d85fa076 h1:aST7iEpuMr507piwgx0WNDezW6ycWIE+ejtnXXaMgI0= -github.com/amitybell/piper-bin-linux v0.0.0-20250621082830-f5d5d85fa076/go.mod h1:dVR33O0l/AFgQNmZfywfgNZ6qlpCKPhLnn9UpeMMLdM= -github.com/amitybell/piper-bin-windows v0.0.0-20231118093113-cc2cef2f6b74 h1:T5hXX0Z2JaE5gtZ7LScjG0r0BmDk0+FWlzyZ2b1nboo= -github.com/amitybell/piper-bin-windows v0.0.0-20231118093113-cc2cef2f6b74/go.mod h1:5Ea0Pc0QdO8FeriIXcqZtHViM2fi589jtFubrjaAk6w= -github.com/amitybell/piper-voice-alan v0.0.0-20231118093148-059963c24dbd h1:DsXuiWSHsbBkVNL7cBAdXD95kNwrE0Ck05OasSeUZ4g= -github.com/amitybell/piper-voice-alan v0.0.0-20231118093148-059963c24dbd/go.mod h1:5ghO6mSctWNXfDoh3r46HQEMIcPr5DqE5TMYfp5hskY= -github.com/amitybell/piper-voice-jenny v0.0.0-20231030195502-2afb5ebf3c45 h1:V/HZAQuprvdo0xXToxAuTLSwD3YrqRpDZLVBOOD+2aE= -github.com/amitybell/piper-voice-jenny v0.0.0-20231030195502-2afb5ebf3c45/go.mod h1:eKG2Bo69QGTVKKKKApafZr+4v4zk40jYNijh0s8/PzU= -github.com/amitybell/piper-voice-jenny v0.0.0-20231118093224-dcf0d49e46b7 h1:GMYJcgP1OKBMBuQfP7r0aRk4PS0AaviHVTERtdt/e/o= -github.com/amitybell/piper-voice-jenny v0.0.0-20231118093224-dcf0d49e46b7/go.mod h1:eKG2Bo69QGTVKKKKApafZr+4v4zk40jYNijh0s8/PzU= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/ebitengine/oto/v3 v3.3.3 h1:m6RV69OqoXYSWCDsHXN9rc07aDuDstGHtait7HXSM7g= @@ -26,8 +10,6 @@ github.com/holoplot/go-evdev v0.0.0-20240306072622-217e18f17db1 h1:92OsBIf5KB1Ta github.com/holoplot/go-evdev v0.0.0-20240306072622-217e18f17db1/go.mod h1:iHAf8OIncO2gcQ8XOjS7CMJ2aPbX2Bs0wl5pZyanEqk= github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I= github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= -github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= -github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M= diff --git a/internal/logger/logger.go b/internal/logger/logger.go index 35fc11e..be04b74 100644 --- a/internal/logger/logger.go +++ b/internal/logger/logger.go @@ -5,6 +5,8 @@ import ( "os" ) +var IsDebugMode = false + func Log(msg string) { fmt.Println(msg) } @@ -13,6 +15,12 @@ func Logf(msg string, params ...interface{}) { fmt.Printf(msg+"\n", params...) } +func LogDebugf(msg string, params ...interface{}) { + if IsDebugMode { + fmt.Printf("DEBUG: %s\n", fmt.Sprintf(msg, params...)) + } +} + func LogError(err error, msg string) { if msg == "" { fmt.Printf("%s\n", err.Error()) diff --git a/readme.md b/readme.md index fbc5444..5c94306 100644 --- a/readme.md +++ b/readme.md @@ -20,6 +20,7 @@ Joyful is ideal for Linux gamers who enjoy space and flight sims and miss the fe * Axis -> Relative Axis mapping, for converting a joystick axis to mouse movement and scrollwheel events. * Configure per-rule configurable deadzones for axes, with multiple ways to specify deadzones. * Define multiple modes with per-mode behavior. + * Text-to-speech engine that announces the current mode when it changes. ### Possible Future Features @@ -47,23 +48,18 @@ Pressing `` in the running terminal window will reload the `rules` sectio ## Build & Install -To build joyful, first use your distribution's package manager to install `go` and `alsa-lib` (this may be `libasound2-dev` or `libasound2-devel` depending on your distribution), then run: +To build joyful, first use your distribution's package manager to install the following packages: +* `go` +* `alsa-lib` - this may be `libasound2-dev` or `libasound2-devel` depending on your distribution +* `espeak-ng` - if you want text-to-speech to announce mode changes + +Then, run: ``` go build -o build/ ./... ``` -Next, copy the binaries in the `build/` directory to somewhere in your `$PATH`. (details depend on your setup, but typically somewhere like `/usr/local/bin` or `~/bin`) - -### Machine Learning Disclosure - -Joyful's text-to-speech support is dependent on [Piper](https://github.com/rhasspy/piper), which uses an offline Machine Learning (ML) model for speech synthesis. The project authors are extremely skeptical of ML/AI technologies in general, but consider speech synthesis, especially offline/local speech synthesis, to be one of the most defensible use cases for it. Since it is very difficult to find text-to-speech systems that don't use ML under the hood (especially that have extant golang wrappers or bindings), this is considered a necessary tradeoff. - -However, if you don't want any ML running on your system, you can optionally choose to skip TTS support at compile-time by building with this command: - -``` -go build -o build -tags notts ./... -``` +Finally, copy the files in the `build/` directory to somewhere in your `$PATH`. (details depend on your setup, but typically somewhere like `/usr/local/bin` or `~/bin`) ## Technical details