-
Notifications
You must be signed in to change notification settings - Fork 2
/
vosk.go
138 lines (114 loc) · 2.97 KB
/
vosk.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Package goEagi of vosk.go provides a simplified interface
// for calling Vosk Server's speech to text service.
// It provides flexibility to the callers and allow them to
// set their desired configuration.
package goEagi
import (
"context"
"encoding/json"
"fmt"
"net/url"
"github.com/gorilla/websocket"
)
// VoskResult is the response from Vosk Speech Recognizer.
type VoskResult struct {
Result []struct {
Conf float64
End float64
Start float64
Word string
}
Text string
Partial string
}
// VoskService is the client for Vosk Speech Recognizer.
type VoskService struct {
PhraseList []string `json:"phrase_list"`
Words bool `json:"words"`
Client *websocket.Conn `json:"-"`
errorStream chan error `json:"-"`
}
// VoskConfig is the configuration for Vosk Speech Recognizer.
type voskConfig struct {
Config VoskService `json:"config"`
}
// NewVoskService creates a new VoskService.
func NewVoskService(host string, port string, phraseList []string) (*VoskService, error) {
h := fmt.Sprintf("%s:%s", host, port)
u := url.URL{Scheme: "ws", Host: h, Path: ""}
// Opening websocket connection
c, _, err := websocket.DefaultDialer.Dial(u.String(), nil)
if err != nil {
return nil, err
}
v := VoskService{
PhraseList: phraseList,
Client: c,
}
config := voskConfig{
Config: v,
}
configJSON, _ := json.Marshal(config)
err = c.WriteMessage(websocket.TextMessage, configJSON)
if err != nil {
return nil, err
}
return &v, nil
}
// StartStreaming starts the streaming to Vosk speech to text service.
// It takes a reading channel of audio stream and sends it as a websocket binary message to Vosk service.
func (v *VoskService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error {
v.errorStream = make(chan error)
go func() {
defer close(v.errorStream)
defer v.Client.Close()
for {
select {
case <-ctx.Done():
v.Close()
return
case buf := <-stream:
err := v.Client.WriteMessage(websocket.BinaryMessage, buf)
if err != nil {
v.errorStream <- fmt.Errorf("streaming error: %v", err)
return
}
}
}
}()
return v.errorStream
}
// Close the websocket connection to Vosk service.
func (v *VoskService) Close() error {
err := v.Client.WriteMessage(websocket.TextMessage, []byte("{\"eof\" : 1}"))
return err
}
// SpeechToTextResponse sends the transcription response from Vosk's SpeechToText.
func (v *VoskService) SpeechToTextResponse(ctx context.Context) <-chan VoskResult {
voskResultStream := make(chan VoskResult)
go func() {
defer close(voskResultStream)
for {
select {
case <-ctx.Done():
return
default:
_, msg, err := v.Client.ReadMessage()
if err != nil {
v.errorStream <- err
return
}
m := VoskResult{}
err = json.Unmarshal(msg, &m)
if err != nil {
v.errorStream <- err
return
}
if m.Text != "" || m.Partial != "" {
voskResultStream <- m
}
}
}
}()
return voskResultStream
}