-
Notifications
You must be signed in to change notification settings - Fork 1
/
DeepSpeechTranscriber.cs
168 lines (133 loc) · 5.68 KB
/
DeepSpeechTranscriber.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Management;
using NAudio.Wave;
using DeepSpeechClient.Interfaces;
using DeepSpeechClient.Models;
namespace DeepSpeechLib
{
public class DeepSpeechTranscriber
{
const String DEFAULT_MODEL = "models/am/techiaith_bangor_20.07.pbmm";
const String DEFAULT_KENLM_SCORER = "models/lm/techiaith_bangor_20.07.scorer";
private String tmpWavFilePath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), "deepspeech.tmp.wav");
public String model { get; private set; }
public String kenlm_scorer { get; private set; }
private IDeepSpeech _sttClient;
private WaveInEvent _waveSource;
private static WaveFileWriter _waveFile;
// lots lifted out of https://deepspeech.readthedocs.io/en/v0.7.3/DotNet-Examples.html
//
public DeepSpeechTranscriber(String model=DEFAULT_MODEL,
String kenlm_scorer= DEFAULT_KENLM_SCORER)
{
this.model = String.IsNullOrEmpty(model) ? DEFAULT_MODEL : model;
this.kenlm_scorer = String.IsNullOrEmpty(kenlm_scorer) ? DEFAULT_KENLM_SCORER : kenlm_scorer;
try
{
_sttClient = new DeepSpeechClient.DeepSpeech(this.model);
_sttClient.EnableExternalScorer(this.kenlm_scorer);
}
catch (Exception exc)
{
Console.Out.WriteLine(exc.Message);
Console.Out.WriteLine(exc.StackTrace);
Tuple<string, bool> avx = isAvxSupported();
if (avx.Item2 == false)
{
throw new Exception(
"Methwyd creu'r peiriant DeepSpeech oherwydd ddiffyg yn CPU ("
+ avx.Item1 + ") y cyfrifiadur.\n\n"
+ "Mae angen cyfrifiadur sydd a fath diweddar o CPU (fel Intel Core i3/5/7/9) ac sy'n cynorthwyo AVX.");
} else
{
throw new Exception("Methwyd creu'r peiriant DeepSpeech am rheswm anhysbys.");
}
}
_waveSource = new WaveInEvent();
_waveSource.WaveFormat = new WaveFormat(16000, 1);
_waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(onWaveSource_DataAvailable);
}
public void StartRecording()
{
_waveFile = new WaveFileWriter(tmpWavFilePath, _waveSource.WaveFormat);
_waveSource.StartRecording();
}
public void AddRecording(String audioFilePath)
{
File.Copy(audioFilePath, tmpWavFilePath);
}
public void StopRecording()
{
_waveSource.StopRecording();
_waveFile.Dispose();
}
public List<String> Transcribe()
{
List<String> result = new List<string>();
var waveBuffer = new WaveBuffer(File.ReadAllBytes(tmpWavFilePath));
using (var waveInfo = new WaveFileReader(tmpWavFilePath))
{
Metadata metaResult = _sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000);
List<CandidateTranscript> candidateTranscriptions = metaResult.Transcripts.ToList();
candidateTranscriptions.OrderByDescending(x => x.Confidence);
foreach (CandidateTranscript ct in candidateTranscriptions)
{
result.Add(MetadataToString(ct));
}
}
waveBuffer.Clear();
return result;
}
private static string MetadataToString(CandidateTranscript transcript)
{
var nl = Environment.NewLine;
string retval =
$"Recognized text: {string.Join("", transcript?.Tokens?.Select(x => x.Text))} {nl}"
+ $"Confidence: {transcript?.Confidence} {nl}"
+ $"Item count: {transcript?.Tokens?.Length} {nl}"
+ string.Join(nl, transcript?.Tokens?.Select(x => $"Timestep : {x.Timestep} TimeOffset: {x.StartTime} Char: {x.Text}"))
+ Environment.NewLine;
return retval;
}
private static void onWaveSource_DataAvailable(object sender, WaveInEventArgs e)
{
if (_waveFile != null)
{
_waveFile.Write(e.Buffer, 0, e.BytesRecorded);
_waveFile.Flush();
}
}
private static Tuple<string, bool> isAvxSupported()
{
ManagementObjectSearcher mso = new ManagementObjectSearcher("select * from Win32_Processor");
String cpu_name = String.Empty;
foreach (ManagementObject mo in mso.Get())
{
cpu_name = mo["Name"].ToString();
break;
}
bool hasAvx = false;
if (cpu_name.Contains("Celeron"))
hasAvx=false;
else if ((cpu_name.Contains("Core(TM)")
&&
( cpu_name.Contains("i3") ||
cpu_name.Contains("i5") ||
cpu_name.Contains("i7") ||
cpu_name.Contains("i9"))
)
)
{
hasAvx = true;
}
else
{
hasAvx = false;
}
return new Tuple<string, bool>(cpu_name, hasAvx);
}
}
}