Compare commits

...

3 Commits

9 changed files with 46 additions and 27 deletions

View File

@ -8,13 +8,15 @@ Personal voice assistant written in go using DeepSpeech. This was mainly created
The prerequisites for trident are:
- libdeepspeech and DeepSpeech models (speech to text)
- mimic (text to speech)
- flite (text to speech)
- go
`libdeepspeech` along with its models can be found in [its github releases](https://github.com/mozilla/DeepSpeech/releases/). Be sure to download the `native_client` tarball for your platform and the `.pbmm` and `.scorer` files.
`mimic` can be installed via an [install script](https://github.com/MycroftAI/mycroft-core/blob/dev/scripts/install-mimic.sh) or its AUR package if using Arch Linux or its derivatives.
`flite` can be installed via your distribution's repositories:
- Debian/Ubuntu: `sudo apt install flite-dev`
- Fedora: `sudo dnf install flite-devel`
- Arch: `sudo pacman -S flite festival-us`
### Installation
Move the previously downloaded models (`.pbmm` and `.scorer`) into this repo as `deepspeech.pbmm` and `deepspeech.scorer`. Then, follow the next steps.

View File

@ -84,11 +84,16 @@ func playActivationTone(ctx *malgo.AllocatedContext) error {
// Create new channel waiting for completion
done := make(chan bool)
doneVar := false
onSamples := func(output, _ []byte, _ uint32) {
// Read as much audio into output as will fit
n, err := io.ReadFull(wavReader, output)
// If error occurred or no bytes read
if err != nil || n == 0 {
if !doneVar && (err != nil || n == 0) {
if *verbose {
log.Debug().Msg("Sample output complete")
}
doneVar = true
// Signal completion
done <- true
}

View File

@ -93,4 +93,4 @@ func configEnv() (gopath, configDir, execDir, confPath string) {
}
// Return all variables
return
}
}

1
go.mod
View File

@ -4,6 +4,7 @@ go 1.16
require (
github.com/asticode/go-astideepspeech v0.10.0
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c
github.com/gen2brain/malgo v0.10.29
github.com/pelletier/go-toml v1.9.0
github.com/rs/zerolog v1.21.0

2
go.sum
View File

@ -5,6 +5,8 @@ github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7
github.com/cryptix/wav v0.0.0-20180415113528-8bdace674401/go.mod h1:knK8fd+KPlGGqSUWogv1DQzGTwnfUvAi0cIoWyOG7+U=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c h1:JBlwZJSYopoPXh0dLN9GGw750uhU08VjKKpl+uX5pE4=
github.com/gen2brain/flite-go v0.0.0-20170519100317-f4df2119132c/go.mod h1:Wv0H30ZpZPf4CrBNqgiG2S4G0CDtZWS2i87JnPtv9LI=
github.com/gen2brain/malgo v0.10.29 h1:bTYiUTUKJsEomNby+W0hgyLrOttUXIk4lTEnKA54iqM=
github.com/gen2brain/malgo v0.10.29/go.mod h1:zHSUNZAXfCeNsZou0RtQ6Zk7gDYLIcKOrUWtAdksnEs=
github.com/pelletier/go-toml v1.9.0 h1:NOd0BRdOKpPf0SxkL3HxSQOG7rNh+4kl6PHcBPFs7Q0=

View File

@ -45,6 +45,7 @@ func main() {
// Define and parse command line flags
tfLogLevel := flag.Int("tf-log-level", 2, "Log level for TensorFlow")
verbose = flag.BoolP("verbose", "v", false, "Log more events")
showDecode := flag.BoolP("show-decode", "d", false, "Show text to speech decodes")
configPath := flag.StringP("config", "c", confPath, "Location of trident TOML config")
modelPath := flag.StringP("model", "m", filepath.Join(execDir, "deepspeech.pbmm"), "Path to DeepSpeech model")
scorerPath := flag.StringP("scorer", "s", filepath.Join(execDir, "deepspeech.scorer"), "Path to DeepSpeech scorer")
@ -207,7 +208,7 @@ func main() {
// Create goroutine to clean stream every minute
go func() {
for {
time.Sleep(time.Minute)
time.Sleep(20 * time.Second)
// Lock mutex of stream
safeStream.Lock()
// Reset stream and buffer
@ -223,7 +224,7 @@ func main() {
var tts string
listenForActivation := true
for {
time.Sleep(time.Second)
time.Sleep(200 * time.Millisecond)
// Convert captured raw audio to slice of int16
slice, err := convToInt16Slice(captured)
if err != nil {
@ -240,6 +241,9 @@ func main() {
if err != nil {
log.Fatal().Err(err).Msg("Error intermediate decoding stream")
}
if *showDecode {
log.Debug().Msg("TTS Decode: " + tts)
}
// If decoded string contains activation phrase and listenForActivation is true
if strings.Contains(tts, config.ActivationPhrase) && listenForActivation {
// Play activation tone

View File

@ -100,4 +100,3 @@ func initPlugins(gopath string) map[string]pluginFunc {
}
return out
}

View File

@ -21,18 +21,21 @@ package shell
import (
"os"
"os/exec"
"trident"
)
func RunPlugin(program string, data map[string]interface{}) {
var shell string
var ok bool
// Attempt to get shell from config, asserting as string
shell, ok = data["shell"].(string)
shell, ok := data["shell"].(string)
// If unsuccessful
if !ok {
// Set shell to default (/bin/sh)
shell = "/bin/sh"
}
sayOutput, ok := data["sayOutput"].(bool)
if !ok {
sayOutput = false
}
// Create command using configured shell or default (/bin/sh)
cmd := exec.Command(shell, "-c", program)
// Set command environment to system environment
@ -40,5 +43,8 @@ func RunPlugin(program string, data map[string]interface{}) {
// Set command's standard error to system standard error
cmd.Stderr = os.Stderr
// Run command, ignoring error
_ = cmd.Run()
output, _ := cmd.Output()
if sayOutput {
trident.Say(string(output))
}
}

View File

@ -18,28 +18,28 @@
package main
import (
"github.com/gen2brain/flite-go"
"github.com/traefik/yaegi/interp"
"os/exec"
"reflect"
)
// Create custom package for trident
var tridentSymbols = interp.Exports{"trident": {
"Say": reflect.ValueOf(Say),
"Say": reflect.ValueOf(Say),
"SayWithVoice": reflect.ValueOf(SayWithVoice),
}}
// Function to say text using mimic text-to-speech
func Say(text string, args ...string) {
// If mimic exists in PATH
if _, err := exec.LookPath("mimic"); err == nil {
// Set initial argument slice to contain text
argSlice := []string{"-t", text}
// Add any additional arguments to slice
argSlice = append(argSlice, args...)
// Create and run command
exec.Command("mimic", argSlice...).Run()
} else {
// If mimic does not exist in PATH, warn user
log.Warn().Err(err).Str("text", text).Msg("Cannot perform text to speech")
func Say(text string) {
fliteVoice, _ := flite.VoiceSelect("slt")
flite.TextToSpeech(text, fliteVoice, "play")
}
func SayWithVoice(text, voice string) error {
fliteVoice, err := flite.VoiceSelect(voice)
if err != nil {
return err
}
}
flite.TextToSpeech(text, fliteVoice, "play")
return nil
}