feat(stt): readd interimResults and lowLatency wss params

watson-developer-cloud · Dec 5, 2024 · 6696356 · 6696356
1 parent e73ef5a
commit 6696356
Show file tree

Hide file tree

Showing 2 changed files with 70 additions and 0 deletions.
diff --git a/.../src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java b/.../src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java
@@ -201,7 +201,9 @@ public interface Model {
   protected Boolean splitTranscriptAtPhraseEnd;
   protected Float speechDetectorSensitivity;
   protected Float backgroundAudioSuppression;
+  protected Boolean lowLatency;
   protected Float characterInsertionBias;
+  private Boolean interimResults;
   private Boolean processingMetrics;
   private Float processingMetricsInterval;
 
@@ -232,7 +234,9 @@ public static class Builder {
     private Boolean splitTranscriptAtPhraseEnd;
     private Float speechDetectorSensitivity;
     private Float backgroundAudioSuppression;
+    private Boolean lowLatency;
     private Float characterInsertionBias;
+    private Boolean interimResults;
     private Boolean processingMetrics;
     private Float processingMetricsInterval;
 
@@ -262,7 +266,9 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) {
       this.splitTranscriptAtPhraseEnd = recognizeWithWebsocketsOptions.splitTranscriptAtPhraseEnd;
       this.speechDetectorSensitivity = recognizeWithWebsocketsOptions.speechDetectorSensitivity;
       this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression;
+      this.lowLatency = recognizeWithWebsocketsOptions.lowLatency;
       this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias;
+      this.interimResults = recognizeWithWebsocketsOptions.interimResults;
       this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics;
       this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval;
     }
@@ -578,6 +584,17 @@ public Builder backgroundAudioSuppression(Float backgroundAudioSuppression) {
       return this;
     }
 
+    /**
+     * Set the lowLatency.
+     *
+     * @param lowLatency the lowLatency
+     * @return the RecognizeOptions builder
+     */
+    public Builder lowLatency(Boolean lowLatency) {
+      this.lowLatency = lowLatency;
+      return this;
+    }
+
     /**
      * Set the characterInsertionBias.
      *
@@ -589,6 +606,19 @@ public Builder characterInsertionBias(Float characterInsertionBias) {
       return this;
     }
 
+    /**
+     * Set the interimResults.
+     *
+     * <p>NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param interimResults the interimResults
+     * @return the interimResults
+     */
+    public Builder interimResults(Boolean interimResults) {
+      this.interimResults = interimResults;
+      return this;
+    }
+
     /**
      * Set the audio.
      *
@@ -655,7 +685,9 @@ protected RecognizeWithWebsocketsOptions(Builder builder) {
     splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
     speechDetectorSensitivity = builder.speechDetectorSensitivity;
     backgroundAudioSuppression = builder.backgroundAudioSuppression;
+    lowLatency = builder.lowLatency;
     characterInsertionBias = builder.characterInsertionBias;
+    interimResults = builder.interimResults;
     processingMetrics = builder.processingMetrics;
     processingMetricsInterval = builder.processingMetricsInterval;
   }
@@ -1091,6 +1123,28 @@ public Float backgroundAudioSuppression() {
     return backgroundAudioSuppression;
   }
 
+  /**
+   * Gets the lowLatency.
+   *
+   * <p>If `true` for next-generation `Multimedia` and `Telephony` models that support low latency,
+   * directs the service to produce results even more quickly than it usually does. Next-generation
+   * models produce transcription results faster than previous-generation models. The `low_latency`
+   * parameter causes the models to produce results even more quickly, though the results might be
+   * less accurate when the parameter is used.
+   *
+   * <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
+   * It is available for most next-generation models. * For a list of next-generation models that
+   * support low latency, see [Supported next-generation language
+   * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
+   * * For more information about the `low_latency` parameter, see [Low
+   * latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
+   *
+   * @return the lowLatency
+   */
+  public Boolean lowLatency() {
+    return lowLatency;
+  }
+
   /**
    * Gets the characterInsertionBias.
    *
@@ -1122,6 +1176,21 @@ public Float characterInsertionBias() {
     return characterInsertionBias;
   }
 
+  /**
+   * Gets the interimResults.
+   *
+   * <p>If `true`, the service returns interim results as a stream of `SpeechRecognitionResults`
+   * objects. By default, the service returns a single `SpeechRecognitionResults` object with final
+   * results only.
+   *
+   * <p>NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the interimResults
+   */
+  public Boolean interimResults() {
+    return interimResults;
+  }
+
   /**
    * Gets the processingMetrics.
    *

diff --git a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java
@@ -347,6 +347,7 @@ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedEx
             .wordAlternativesThreshold(0.5f)
             .model(EN_BROADBAND16K)
             .contentType(HttpMediaType.AUDIO_WAV)
+            .interimResults(true)
             .processingMetrics(true)
             .processingMetricsInterval(0.2f)
             .audioMetrics(true)