A Go library designed to seamlessly integrate with Asterisk's EAGI, offering essential functionalities for enhanced interaction and communication.
- Audio Streaming
- Google's Text to Speech
- Google's Speech to Text
- Microsoft Azure's Speech to Text
- Vosk server Speech to Text
- Voice Activity Detection
- Speech File Generation
- Commands to Asterisk
- Render text to speech and play it back to the user.
- You may refer the language code and voice name here.
- Example dialplan code:
;GoogleTTS, playback message to the user
exten => 1234,1,Answer
exten => 1234,n,AGI(<build-script>, "What's up my buddy? how are you?", "en-GB", "en-GB-Neural2-A")
exten => 1234,n,Hangup
- Example Go code:
package main
import (
"strings"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
content := strings.TrimSpace(eagi.Env["arg_1"])
languageCode := strings.TrimSpace(eagi.Env["arg_2"])
voiceName := strings.TrimSpace(eagi.Env["arg_3"])
tts, err := goEagi.NewGoogleTTS(
"<GoogleSpeechToTextPrivateKey>",
"/tmp/tts",
languageCode,
voiceName)
if err != nil {
eagi.Verbose(err.Error())
}
audioPath, err := tts.GenerateAudio(content)
if err != nil {
eagi.Verbose(err.Error())
}
_, err = eagi.StreamFile(audioPath, "")
if err != nil {
eagi.Verbose(err.Error())
}
}
package main
import (
"context"
"fmt"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "<languageCode>", nil)
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer googleService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
audioStream := goEagi.StreamAudio(ctx)
errCh := googleService.StartStreaming(ctx, bridgeStream)
googleResponseCh := googleService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", err))
cancel()
return
case response := <-googleResponseCh:
if response.Error != nil {
eagi.Verbose(fmt.Sprintf("Google speech to text response: G error: %v", response.Error))
cancel()
return
}
transcription := response.Result.Alternatives[0].Transcript
isFinal := response.Result.IsFinal
eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", isFinal, transcription))
}
}
}
- Prerequisite - install the Speech SDK
- Carefully read the Speech SDK documentation and verify the platform requirements to ensure compatibility with your Asterisk server.
- If it is not possible to install the Speech SDK on your Asterisk server, you can install it on a different machine and stream the audio from your Asterisk server to the Speech SDK.
- For Azure Speech to Text, you need to enable "CGO_ENABLED" flag and build the project with the tag "azure", as shown below:
CGO_ENABLED=1 go build -tags azure main.go
package main
import (
"context"
"fmt"
"os"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
azureService, err := goEagi.NewAzureService("<subscriptionKey>", "serviceRegion", "", []string{"...<language_code>"})
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer azureService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
audioStream := goEagi.StreamAudio(ctx)
errCh := azureService.StartStreaming(ctx, bridgeStream)
azureResponseCh := azureService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", err))
cancel()
return
case response := <-azureResponseCh:
if response.Error != nil {
eagi.Verbose(fmt.Sprintf("Azure speech to text response: G error: %v", response.Error))
cancel()
return
}
if response.Info != "" {
eagi.Verbose(fmt.Sprintf("Info: %v", response.Info))
continue
}
eagi.Verbose(fmt.Sprintf("IsFinal: %v, Transcription: %v\n", response.IsFinal, response.Transcription))
}
}
}
- prerequisite - run the vosk server
docker run -d -p 2700:2700 alphacep/kaldi-en:latest
package main
import (
"context"
"fmt"
"os"
"github.com/andrewyang17/goEagi"
)
func main() {
eagi, err := goEagi.New()
if err != nil {
os.Stdout.WriteString(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
//use phraseList to list the valid phrases/words.
//notes
// * if you use a phrase list, Vosk will only detect these words, ignoring any other word
// * some Vosk models doesn't support phrase list (I tested with spanish)
// * to disable phrase list, leave phraseList empty
voskService, err := goEagi.NewVoskService("<voskHost>", "<voskPort>", nil)
if err != nil {
eagi.Verbose(fmt.Sprintf("error: %v", err))
os.Exit(1)
}
defer voskService.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
bridgeStream := make(chan []byte)
defer close(bridgeStream)
audioStream := goEagi.StreamAudio(ctx)
errCh := voskService.StartStreaming(ctx, bridgeStream)
voskResponseCh := voskService.SpeechToTextResponse(ctx)
go func(ctx context.Context, eagi *goEagi.Eagi) {
for {
select {
case <-ctx.Done(): return
case audio := <-audioStream:
if audio.Error != nil {
eagi.Verbose(fmt.Sprintf("audio streaming: G error: %v", audio.Error))
cancel()
return
}
bridgeStream <- audio.Stream
}
}
}(ctx, eagi)
for {
select {
case <-ctx.Done(): return
case err := <-errCh:
eagi.Verbose(fmt.Sprintf("Vosk speech to text response: G error: %v", err))
cancel()
return
case response := <-voskResponseCh:
// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text.
eagi.Verbose(fmt.Sprintf("Transcription: %v\n", response.Text))
}
}
}
Made with contrib.rocks
Contributions are always welcome!
MIT License, see LICENSE.
Andrew Yang - [email protected]
Project Link: https://github.com/andrewyang17/goEagi
We would like to express our gratitude to the authors and contributors of the following open-source libraries, which were used in this project:
- cloud.google.com/go/speech: Developed by Google
- github.com/Microsoft/cognitive-services-speech-sdk-go: Developed by Microsoft
- github.com/cryptix/wav: Developed by Henry Cryptix
- github.com/zaf/agi: Developed by Lefteris Zafiris
- github.com/gorilla/websocket: Developed by Gorilla