MycroftAI · krisgesling · Apr 27, 2020 · Apr 24, 2020 · Apr 26, 2020 · Apr 27, 2020
diff --git a/mycroft/stt/__init__.py b/mycroft/stt/__init__.py
@@ -108,14 +108,84 @@ def execute(self, audio, language=None):
         return self.recognizer.recognize_wit(audio, self.token)
 
 
-class IBMSTT(BasicSTT):
+class IBMSTT(TokenSTT):
+    """
+        IBM Speech to Text
+        Enables IBM Speech to Text access using API key. To use IBM as a
+        service provider, it must be configured locally in your config file. An
+        IBM Cloud account with Speech to Text enabled is required (limited free
+        tier may be available). STT config should match the following format:
+
+        "stt": {
+            "module": "ibm",
+            "ibm": {
+                "credential": {
+                    "token": "YOUR_API_KEY"
+                },
+                "url": "URL_FROM_SERVICE"
+            }
+        }
+    """
     def __init__(self):
         super(IBMSTT, self).__init__()
 
     def execute(self, audio, language=None):
+        if not self.token:
+            raise ValueError('API key (token) for IBM Cloud is not defined.')
+
+        url_base = self.config.get('url', '')
+        if not url_base:
+            raise ValueError('URL for IBM Cloud is not defined.')
+        url = url_base + '/v1/recognize'
+
         self.lang = language or self.lang
-        return self.recognizer.recognize_ibm(audio, self.username,
-                                             self.password, self.lang)
+        supported_languages = [
+            'ar-AR', 'pt-BR', 'zh-CN', 'nl-NL', 'en-GB', 'en-US', 'fr-FR',
+            'de-DE', 'it-IT', 'ja-JP', 'ko-KR', 'es-AR', 'es-ES', 'es-CL',
+            'es-CO', 'es-MX', 'es-PE'
+        ]
+        if self.lang not in supported_languages:
+            raise ValueError(
+                'Unsupported language "{}" for IBM STT.'.format(self.lang))
+
+        audio_model = 'BroadbandModel'
+        if audio.sample_rate < 16000 and not self.lang == 'ar-AR':
+            audio_model = 'NarrowbandModel'
+
+        params = {
+            'model': '{}_{}'.format(self.lang, audio_model),
+            'profanity_filter': 'false'
+        }
+        headers = {
+            'Content-Type': 'audio/x-flac',
+            'X-Watson-Learning-Opt-Out': 'true'
+        }
+
+        response = post(url, auth=('apikey', self.token), headers=headers,
+                        data=audio.get_flac_data(), params=params)
+
+        if response.status_code == 200:
+            result = json.loads(response.text)
+            if result.get('error_code') is None:
+                if ('results' not in result or len(result['results']) < 1 or
+                        'alternatives' not in result['results'][0]):
+                    raise Exception(
+                        'Transcription failed. Invalid or empty results.')
+                transcription = []
+                for utterance in result['results']:
+                    if 'alternatives' not in utterance:
+                        raise Exception(
+                            'Transcription failed. Invalid or empty results.')
+                    for hypothesis in utterance['alternatives']:
+                        if 'transcript' in hypothesis:
+                            transcription.append(hypothesis['transcript'])
+                return '\n'.join(transcription)
+        elif response.status_code == 401:  # Unauthorized
+            raise Exception('Invalid API key for IBM Cloud.')
+        else:
+            raise Exception(
+                'Request to IBM Cloud failed. Code: {} Body: {}'.format(
+                    response.status_code, response.text))
 
 
 class YandexSTT(STT):

diff --git a/test/unittests/stt/test_stt.py b/test/unittests/stt/test_stt.py
@@ -35,13 +35,18 @@ def test_factory(self, mock_get):
                     'google': {'credential': {'token': 'FOOBAR'}},
                     'bing': {'credential': {'token': 'FOOBAR'}},
                     'houndify': {'credential': {'client_id': 'FOO',
-                                                "client_key": "BAR"}},
+                                                "client_key": 'BAR'}},
                     'google_cloud': {
                         'credential': {
                             'json': {}
                         }
                     },
-                    'ibm': {'credential': {'token': 'FOOBAR'}},
+                    'ibm': {
+                        'credential': {
+                            'token': 'FOOBAR'
+                        },
+                        'url': 'https://test.com/'
+                    },
                     'kaldi': {'uri': 'https://test.com'},
                     'mycroft': {'uri': 'https://test.com'}
                 },
@@ -164,26 +169,64 @@ def test_google_cloud_stt(self, mock_get):
         stt.execute(audio)
         self.assertTrue(stt.recognizer.recognize_google_cloud.called)
 
+    @patch('mycroft.stt.post')
     @patch.object(Configuration, 'get')
-    def test_ibm_stt(self, mock_get):
-        mycroft.stt.Recognizer = MagicMock
+    def test_ibm_stt(self, mock_get, mock_post):
+        import json
+
         config = base_config()
         config.merge(
             {
                 'stt': {
                     'module': 'ibm',
                     'ibm': {
-                        'credential': {'username': 'FOO', 'password': 'BAR'}
+                        'credential': {
+                            'token': 'FOOBAR'
+                        },
+                        'url': 'https://test.com'
                     },
                 },
                 'lang': 'en-US'
-            })
+            }
+        )
         mock_get.return_value = config
 
+        requests_object = MagicMock()
+        requests_object.status_code = 200
+        requests_object.text = json.dumps({
+            'results': [
+                {
+                    'alternatives': [
+                        {
+                            'confidence': 0.96,
+                            'transcript': 'sample response'
+                        }
+                    ],
+                    'final': True
+                }
+            ],
+            'result_index': 0
+        })
+        mock_post.return_value = requests_object
+
         audio = MagicMock()
+        audio.sample_rate = 16000
+
         stt = mycroft.stt.IBMSTT()
         stt.execute(audio)
-        self.assertTrue(stt.recognizer.recognize_ibm.called)
+
+        test_url_base = 'https://test.com/v1/recognize'
+        mock_post.assert_called_with(test_url_base,
+                                     auth=('apikey', 'FOOBAR'),
+                                     headers={
+                                         'Content-Type': 'audio/x-flac',
+                                         'X-Watson-Learning-Opt-Out': 'true'
+                                     },
+                                     data=audio.get_flac_data(),
+                                     params={
+                                         'model': 'en-US_BroadbandModel',
+                                         'profanity_filter': 'false'
+                                     })
 
     @patch.object(Configuration, 'get')
     def test_wit_stt(self, mock_get):