-
Notifications
You must be signed in to change notification settings - Fork 2
/
audio.go
188 lines (157 loc) · 4.48 KB
/
audio.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Package goEagi of audio.go provides functionality on
// audio streaming from file descriptor 3 in Asterisk,
// amplitude computation and
// wav audio generation from audio bytes.
package goEagi
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"math"
"os"
"path/filepath"
"syscall"
"github.com/cryptix/wav"
)
const (
audioSampleRate = 8000
audioBitsPerSample = 16
audioBytesPerSample = audioBitsPerSample / 8
audioChannel = 1
defaultFileDescriptorPath = "/dev/fd/3"
)
type AudioResult struct {
Error error
Stream []byte
}
// StreamAudio launches a new goroutine for audio streaming via file descriptor 3.
func StreamAudio(ctx context.Context) <-chan AudioResult {
audioResultStream := make(chan AudioResult)
go func() {
defer close(audioResultStream)
fd, err := syscall.Open(defaultFileDescriptorPath, syscall.O_RDONLY, 0755)
if err != nil {
r := AudioResult{Error: fmt.Errorf("could not open fd3: %v\n", err)}
audioResultStream <- r
return
}
buf := make([]byte, 1024)
for {
select {
case <-ctx.Done():
return
default:
n, err := syscall.Read(fd, buf)
if err != nil {
r := AudioResult{Error: fmt.Errorf("failed to read fd3: %v\n", err)}
audioResultStream <- r
return
}
if n > 0 {
audioResultStream <- AudioResult{Stream: buf[:n]}
}
}
}
}()
return audioResultStream
}
// ComputeAmplitude analyzes the amplitude of a sample slice of bytes.
func ComputeAmplitude(sample []byte) (float64, error) {
parseData, err := parseRawData(sample)
if err != nil {
return 0, err
}
computeRms := rms(parseData)
maxAmp := maxPossibleAmplitude()
db := ratioToDb(computeRms, maxAmp)
return db + 90, nil
}
// GenerateAudio writes a sample slice of bytes into an audio file.
// It returns a location path of an audio which passed in the function parameters.
// Please note that only wav extension is supported.
func GenerateAudio(sample []byte, audioDirectory string, audioName string) (string, error) {
if fileExtension := filepath.Ext(audioName); fileExtension != ".wav" {
return "", errors.New("audio name does not contain .wav extension")
}
if _, err := os.Stat(audioDirectory); os.IsNotExist(err) {
if err := os.MkdirAll(audioDirectory, os.ModePerm); err != nil {
return "", err
}
}
audioPath := audioDirectory + audioName
file, err := os.Create(audioPath)
if err != nil {
return "", fmt.Errorf("failed to create audio path: %v\n", err)
}
defer file.Close()
meta := wav.File{
NumberOfSamples: uint32(len(sample)),
SampleRate: audioSampleRate,
SignificantBits: audioBitsPerSample,
Channels: audioChannel,
}
writer, err := meta.NewWriter(file)
if err != nil {
return "", err
}
defer writer.Close()
bytesSampleSize := int(meta.SignificantBits) / 8
for i := 0; i < len(sample); i += bytesSampleSize {
if err := writer.WriteSample(sample[i : i+bytesSampleSize]); err != nil {
return "", fmt.Errorf("failed to generate audio: %v\n", err)
}
}
return audioPath, nil
}
// scaleFrame is used in parseRawData.
func scaleFrame(unscaled int) float64 {
maxV := math.MaxInt16
return float64(unscaled) / float64(maxV)
}
// bits16ToInt is used in parseRawData.
func bits16ToInt(b []byte) (int, error) {
if len(b) != 2 {
return 0, errors.New("slice of bytes must be length of 2")
}
var payload int16
framesPerBuffer := bytes.NewReader(b)
if err := binary.Read(framesPerBuffer, binary.LittleEndian, &payload); err != nil {
return 0, err
}
return int(payload), nil
}
// parseRawData is used in ComputeAmplitude.
func parseRawData(rawData []byte) ([]float64, error) {
var frames []float64
for i := 0; i < len(rawData); i += audioBytesPerSample {
rawFrame := rawData[i : i+audioBytesPerSample]
unscaledFrame, err := bits16ToInt(rawFrame)
if err != nil {
return nil, err
}
scaled := scaleFrame(unscaledFrame)
frames = append(frames, scaled)
}
return frames, nil
}
// rms is used in ComputeAmplitude.
func rms(samples []float64) float64 {
sampleCount := len(samples) / audioBytesPerSample
var sumSquare float64
for _, sample := range samples {
sumSquare += sample * sample
}
return math.Sqrt(sumSquare / float64(sampleCount))
}
// maxPossibleAmplitude is used in ComputeAmplitude.
func maxPossibleAmplitude() float64 {
maxPossibleVal := math.Pow(2, float64(audioBitsPerSample))
return maxPossibleVal / 2
}
// ratioToDb is used in ComputeAmplitude.
func ratioToDb(rms, maxAmplitude float64) float64 {
ratio := rms / maxAmplitude
return 20 * math.Log10(ratio)
}