Skip to content

Commit

Permalink
feat(stt): readd interimResults and lowLatency wss params
Browse files Browse the repository at this point in the history
  • Loading branch information
apaparazzi0329 committed Dec 5, 2024
1 parent e73ef5a commit 6696356
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ public interface Model {
protected Boolean splitTranscriptAtPhraseEnd;
protected Float speechDetectorSensitivity;
protected Float backgroundAudioSuppression;
protected Boolean lowLatency;
protected Float characterInsertionBias;
private Boolean interimResults;
private Boolean processingMetrics;
private Float processingMetricsInterval;

Expand Down Expand Up @@ -232,7 +234,9 @@ public static class Builder {
private Boolean splitTranscriptAtPhraseEnd;
private Float speechDetectorSensitivity;
private Float backgroundAudioSuppression;
private Boolean lowLatency;
private Float characterInsertionBias;
private Boolean interimResults;
private Boolean processingMetrics;
private Float processingMetricsInterval;

Expand Down Expand Up @@ -262,7 +266,9 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) {
this.splitTranscriptAtPhraseEnd = recognizeWithWebsocketsOptions.splitTranscriptAtPhraseEnd;
this.speechDetectorSensitivity = recognizeWithWebsocketsOptions.speechDetectorSensitivity;
this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression;
this.lowLatency = recognizeWithWebsocketsOptions.lowLatency;
this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias;
this.interimResults = recognizeWithWebsocketsOptions.interimResults;
this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics;
this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval;
}
Expand Down Expand Up @@ -578,6 +584,17 @@ public Builder backgroundAudioSuppression(Float backgroundAudioSuppression) {
return this;
}

/**
* Set the lowLatency.
*
* @param lowLatency the lowLatency
* @return the RecognizeOptions builder
*/
public Builder lowLatency(Boolean lowLatency) {
this.lowLatency = lowLatency;
return this;
}

/**
* Set the characterInsertionBias.
*
Expand All @@ -589,6 +606,19 @@ public Builder characterInsertionBias(Float characterInsertionBias) {
return this;
}

/**
* Set the interimResults.
*
* <p>NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
*
* @param interimResults the interimResults
* @return the interimResults
*/
public Builder interimResults(Boolean interimResults) {
this.interimResults = interimResults;
return this;
}

/**
* Set the audio.
*
Expand Down Expand Up @@ -655,7 +685,9 @@ protected RecognizeWithWebsocketsOptions(Builder builder) {
splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
speechDetectorSensitivity = builder.speechDetectorSensitivity;
backgroundAudioSuppression = builder.backgroundAudioSuppression;
lowLatency = builder.lowLatency;
characterInsertionBias = builder.characterInsertionBias;
interimResults = builder.interimResults;
processingMetrics = builder.processingMetrics;
processingMetricsInterval = builder.processingMetricsInterval;
}
Expand Down Expand Up @@ -1091,6 +1123,28 @@ public Float backgroundAudioSuppression() {
return backgroundAudioSuppression;
}

/**
* Gets the lowLatency.
*
* <p>If `true` for next-generation `Multimedia` and `Telephony` models that support low latency,
* directs the service to produce results even more quickly than it usually does. Next-generation
* models produce transcription results faster than previous-generation models. The `low_latency`
* parameter causes the models to produce results even more quickly, though the results might be
* less accurate when the parameter is used.
*
* <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
* It is available for most next-generation models. * For a list of next-generation models that
* support low latency, see [Supported next-generation language
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
* * For more information about the `low_latency` parameter, see [Low
* latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
*
* @return the lowLatency
*/
public Boolean lowLatency() {
return lowLatency;
}

/**
* Gets the characterInsertionBias.
*
Expand Down Expand Up @@ -1122,6 +1176,21 @@ public Float characterInsertionBias() {
return characterInsertionBias;
}

/**
* Gets the interimResults.
*
* <p>If `true`, the service returns interim results as a stream of `SpeechRecognitionResults`
* objects. By default, the service returns a single `SpeechRecognitionResults` object with final
* results only.
*
* <p>NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
*
* @return the interimResults
*/
public Boolean interimResults() {
return interimResults;
}

/**
* Gets the processingMetrics.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedEx
.wordAlternativesThreshold(0.5f)
.model(EN_BROADBAND16K)
.contentType(HttpMediaType.AUDIO_WAV)
.interimResults(true)
.processingMetrics(true)
.processingMetricsInterval(0.2f)
.audioMetrics(true)
Expand Down

0 comments on commit 6696356

Please sign in to comment.