Skip to content

feat: added an example with speech contexts #56

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/snippets/stt_streaming_recognize_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def print_streaming_recognition_responses(responses):
print('"' + alternative.transcript + '"')
print("------------------")

stub = stt_pb2_grpc.SpeechToTextStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
#stub = stt_pb2_grpc.SpeechToTextStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
stub = stt_pb2_grpc.SpeechToTextStub(grpc.insecure_channel(endpoint))
metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt")
responses = stub.StreamingRecognize(generate_repeated_requests(3), metadata=metadata)
print_streaming_recognition_responses(responses)
71 changes: 71 additions & 0 deletions python/snippets/stt_streaming_recognize_context_dictionaries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python3

import sys
sys.path.append("..")

from tinkoff.cloud.stt.v1 import stt_pb2_grpc, stt_pb2
from auth import authorization_metadata
import grpc
import os
import wave

endpoint = os.environ.get("VOICEKIT_ENDPOINT") or "api.tinkoff.ai:443"
api_key = os.environ["VOICEKIT_API_KEY"]
secret_key = os.environ["VOICEKIT_SECRET_KEY"]

def build_first_request(sample_rate_hertz, num_channels, context):
request = stt_pb2.StreamingRecognizeRequest()
request.streaming_config.config.encoding = stt_pb2.AudioEncoding.LINEAR16
request.streaming_config.config.sample_rate_hertz = sample_rate_hertz
request.streaming_config.config.num_channels = num_channels
# ВАЖНО!
# Не рекомендуется:
# - добавление слов короче 5 символов
# - выставление score в слишком большое значение
request.streaming_config.config.speech_contexts.append(context)
return request

def generate_requests(context):
try:
with wave.open("../../audio/numbers.wav") as f:
yield build_first_request(f.getframerate(), f.getnchannels(), context)
frame_samples = f.getframerate()//10 # Send 100ms at a time
for data in iter(lambda:f.readframes(frame_samples), b''):
request = stt_pb2.StreamingRecognizeRequest()
request.audio_content = data
yield request
# Sending 1 second of silence
for i in range(10):
request = stt_pb2.StreamingRecognizeRequest()
request.audio_content = bytes(frame_samples)
yield request
except Exception as e:
print("Got exception in generate_requests", e)
raise

def generate_repeated_requests(times_repeated):
context = stt_pb2.SpeechContext(speech_context_dictionary_id = "numbers")
null_context = stt_pb2.SpeechContext(phrases = [])
# Контекст может быть указан несколько раз во время стриминговой сессии
# В этом примере чередуется добавление контекста и его сброс
# Стоит отметить, что для сброса контекста нужно переслать SpeechContext с пустым списком фраз
for i in range(times_repeated):
for request in generate_requests(context if (i % 2 == 1) else null_context):
yield request


def print_streaming_recognition_responses(responses):
for response in responses:
for result in response.results:
print("Channel", result.recognition_result.channel)
print("Phrase start:", result.recognition_result.start_time.ToTimedelta())
print("Phrase end: ", result.recognition_result.end_time.ToTimedelta())
for alternative in result.recognition_result.alternatives:
print('"' + alternative.transcript + '"')
print("------------------")

#stub = stt_pb2_grpc.SpeechToTextStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
stub = stt_pb2_grpc.SpeechToTextStub(grpc.insecure_channel(endpoint))
metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.stt")
responses = stub.StreamingRecognize(generate_repeated_requests(3), metadata=metadata)
print_streaming_recognition_responses(responses)
182 changes: 62 additions & 120 deletions python/tinkoff/cloud/stt/v1/stt_pb2_grpc.py
Original file line number Diff line number Diff line change
@@ -1,139 +1,81 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc

from tinkoff.cloud.longrunning.v1 import longrunning_pb2 as tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2
from tinkoff.cloud.stt.v1 import stt_pb2 as tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2


class SpeechToTextStub(object):
"""Speech recognition.
"""

def __init__(self, channel):
"""Constructor.

Args:
channel: A grpc.Channel.
"""
self.Recognize = channel.unary_unary(
'/tinkoff.cloud.stt.v1.SpeechToText/Recognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeResponse.FromString,
)
self.StreamingRecognize = channel.stream_stream(
'/tinkoff.cloud.stt.v1.SpeechToText/StreamingRecognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeResponse.FromString,
)
self.LongRunningRecognize = channel.unary_unary(
'/tinkoff.cloud.stt.v1.SpeechToText/LongRunningRecognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.LongRunningRecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2.Operation.FromString,
)
"""Speech recognition
"""

def __init__(self, channel):
"""Constructor.

class SpeechToTextServicer(object):
"""Speech recognition.
Args:
channel: A grpc.Channel.
"""
self.Recognize = channel.unary_unary(
'/tinkoff.cloud.stt.v1.SpeechToText/Recognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeResponse.FromString,
)
self.StreamingRecognize = channel.stream_stream(
'/tinkoff.cloud.stt.v1.SpeechToText/StreamingRecognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeResponse.FromString,
)
self.LongRunningRecognize = channel.unary_unary(
'/tinkoff.cloud.stt.v1.SpeechToText/LongRunningRecognize',
request_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.LongRunningRecognizeRequest.SerializeToString,
response_deserializer=tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2.Operation.FromString,
)

def Recognize(self, request, context):
"""Method to recognize whole audio at once: sending complete audio, getting complete recognition result.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

def StreamingRecognize(self, request_iterator, context):
"""Method for streaming recognition.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

def LongRunningRecognize(self, request, context):
"""Method to create longrunning recognition operation. Created operation will persist for a limited time and will be deleted after that time has expired.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

class SpeechToTextServicer(object):
"""Speech recognition
"""

def add_SpeechToTextServicer_to_server(servicer, server):
rpc_method_handlers = {
'Recognize': grpc.unary_unary_rpc_method_handler(
servicer.Recognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeResponse.SerializeToString,
),
'StreamingRecognize': grpc.stream_stream_rpc_method_handler(
servicer.StreamingRecognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeResponse.SerializeToString,
),
'LongRunningRecognize': grpc.unary_unary_rpc_method_handler(
servicer.LongRunningRecognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.LongRunningRecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2.Operation.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'tinkoff.cloud.stt.v1.SpeechToText', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))

def Recognize(self, request, context):
"""Method to recognize whole audio at once: sending complete audio, getting complete recognition result.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

# This class is part of an EXPERIMENTAL API.
class SpeechToText(object):
"""Speech recognition.
def StreamingRecognize(self, request_iterator, context):
"""Method for streaming recognition.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

@staticmethod
def Recognize(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/tinkoff.cloud.stt.v1.SpeechToText/Recognize',
tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeRequest.SerializeToString,
tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
def LongRunningRecognize(self, request, context):
"""Method to create longrunning recognition operation. Created operation will persist for a limited time and will be deleted after that time has expired.
"""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

@staticmethod
def StreamingRecognize(request_iterator,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.stream_stream(request_iterator, target, '/tinkoff.cloud.stt.v1.SpeechToText/StreamingRecognize',
tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeRequest.SerializeToString,
tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

@staticmethod
def LongRunningRecognize(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/tinkoff.cloud.stt.v1.SpeechToText/LongRunningRecognize',
tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.LongRunningRecognizeRequest.SerializeToString,
tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2.Operation.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
def add_SpeechToTextServicer_to_server(servicer, server):
rpc_method_handlers = {
'Recognize': grpc.unary_unary_rpc_method_handler(
servicer.Recognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.RecognizeResponse.SerializeToString,
),
'StreamingRecognize': grpc.stream_stream_rpc_method_handler(
servicer.StreamingRecognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.StreamingRecognizeResponse.SerializeToString,
),
'LongRunningRecognize': grpc.unary_unary_rpc_method_handler(
servicer.LongRunningRecognize,
request_deserializer=tinkoff_dot_cloud_dot_stt_dot_v1_dot_stt__pb2.LongRunningRecognizeRequest.FromString,
response_serializer=tinkoff_dot_cloud_dot_longrunning_dot_v1_dot_longrunning__pb2.Operation.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'tinkoff.cloud.stt.v1.SpeechToText', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
Loading