Skip to content
This repository has been archived by the owner on Sep 8, 2024. It is now read-only.

Issues-2516 - Resolved IBM Cloud integration issue #2548

Merged
merged 4 commits into from
Apr 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 73 additions & 3 deletions mycroft/stt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,84 @@ def execute(self, audio, language=None):
return self.recognizer.recognize_wit(audio, self.token)


class IBMSTT(BasicSTT):
class IBMSTT(TokenSTT):
"""
IBM Speech to Text
Enables IBM Speech to Text access using API key. To use IBM as a
service provider, it must be configured locally in your config file. An
IBM Cloud account with Speech to Text enabled is required (limited free
tier may be available). STT config should match the following format:

"stt": {
"module": "ibm",
"ibm": {
"credential": {
"token": "YOUR_API_KEY"
},
"url": "URL_FROM_SERVICE"
}
}
"""
def __init__(self):
super(IBMSTT, self).__init__()

def execute(self, audio, language=None):
if not self.token:
raise ValueError('API key (token) for IBM Cloud is not defined.')

url_base = self.config.get('url', '')
if not url_base:
raise ValueError('URL for IBM Cloud is not defined.')
url = url_base + '/v1/recognize'

self.lang = language or self.lang
return self.recognizer.recognize_ibm(audio, self.username,
self.password, self.lang)
supported_languages = [
'ar-AR', 'pt-BR', 'zh-CN', 'nl-NL', 'en-GB', 'en-US', 'fr-FR',
'de-DE', 'it-IT', 'ja-JP', 'ko-KR', 'es-AR', 'es-ES', 'es-CL',
'es-CO', 'es-MX', 'es-PE'
]
if self.lang not in supported_languages:
raise ValueError(
'Unsupported language "{}" for IBM STT.'.format(self.lang))

audio_model = 'BroadbandModel'
if audio.sample_rate < 16000 and not self.lang == 'ar-AR':
audio_model = 'NarrowbandModel'

params = {
'model': '{}_{}'.format(self.lang, audio_model),
'profanity_filter': 'false'
}
headers = {
'Content-Type': 'audio/x-flac',
'X-Watson-Learning-Opt-Out': 'true'
}

response = post(url, auth=('apikey', self.token), headers=headers,
data=audio.get_flac_data(), params=params)

if response.status_code == 200:
result = json.loads(response.text)
if result.get('error_code') is None:
if ('results' not in result or len(result['results']) < 1 or
'alternatives' not in result['results'][0]):
raise Exception(
'Transcription failed. Invalid or empty results.')
transcription = []
for utterance in result['results']:
if 'alternatives' not in utterance:
raise Exception(
'Transcription failed. Invalid or empty results.')
for hypothesis in utterance['alternatives']:
if 'transcript' in hypothesis:
transcription.append(hypothesis['transcript'])
return '\n'.join(transcription)
elif response.status_code == 401: # Unauthorized
raise Exception('Invalid API key for IBM Cloud.')
else:
raise Exception(
'Request to IBM Cloud failed. Code: {} Body: {}'.format(
response.status_code, response.text))


class YandexSTT(STT):
Expand Down
57 changes: 50 additions & 7 deletions test/unittests/stt/test_stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,18 @@ def test_factory(self, mock_get):
'google': {'credential': {'token': 'FOOBAR'}},
'bing': {'credential': {'token': 'FOOBAR'}},
'houndify': {'credential': {'client_id': 'FOO',
"client_key": "BAR"}},
"client_key": 'BAR'}},
'google_cloud': {
'credential': {
'json': {}
}
},
'ibm': {'credential': {'token': 'FOOBAR'}},
'ibm': {
'credential': {
'token': 'FOOBAR'
},
'url': 'https://test.com/'
},
'kaldi': {'uri': 'https://test.com'},
'mycroft': {'uri': 'https://test.com'}
},
Expand Down Expand Up @@ -164,26 +169,64 @@ def test_google_cloud_stt(self, mock_get):
stt.execute(audio)
self.assertTrue(stt.recognizer.recognize_google_cloud.called)

@patch('mycroft.stt.post')
@patch.object(Configuration, 'get')
def test_ibm_stt(self, mock_get):
mycroft.stt.Recognizer = MagicMock
def test_ibm_stt(self, mock_get, mock_post):
import json

config = base_config()
config.merge(
{
'stt': {
'module': 'ibm',
'ibm': {
'credential': {'username': 'FOO', 'password': 'BAR'}
'credential': {
'token': 'FOOBAR'
},
'url': 'https://test.com'
},
},
'lang': 'en-US'
})
}
)
mock_get.return_value = config

requests_object = MagicMock()
requests_object.status_code = 200
requests_object.text = json.dumps({
'results': [
{
'alternatives': [
{
'confidence': 0.96,
'transcript': 'sample response'
}
],
'final': True
}
],
'result_index': 0
})
mock_post.return_value = requests_object

audio = MagicMock()
audio.sample_rate = 16000

stt = mycroft.stt.IBMSTT()
stt.execute(audio)
self.assertTrue(stt.recognizer.recognize_ibm.called)

test_url_base = 'https://test.com/v1/recognize'
mock_post.assert_called_with(test_url_base,
auth=('apikey', 'FOOBAR'),
headers={
'Content-Type': 'audio/x-flac',
'X-Watson-Learning-Opt-Out': 'true'
},
data=audio.get_flac_data(),
params={
'model': 'en-US_BroadbandModel',
'profanity_filter': 'false'
})

@patch.object(Configuration, 'get')
def test_wit_stt(self, mock_get):
Expand Down