def save_result(wav_name: str, result: str) -> str:
if not os.path.isdir('output'):
os.makedirs('output')
index = 1
while 1:
file_path = f'output/{os.path.basename(wav_name)}_{index}.txt'
if not os.path.exists(file_path):
with open(file_path, 'w', encoding="utf-8") as f:
f.write(result)
print(f"------------\n已將結果儲存到: {file_path}")
break
index += 1
def convertduration(t):
x = t / 10000
return int((x / 1000)), (x % 1000)
##-- Code to Create Subtitle --#
# 3 Seconds
bin = 0
duration = 0
transcriptions = []
transcript = ""
index, prev = 5, 5
wordstartsec, wordstartmicrosec = 5, 5
for i in range(len(speech_to_text_response)):
# Forms the sentence until the bin size condition is met
transcript = transcript + " " + speech_to_text_response[i]["Word"]
# Checks whether the elapsed duration is less than the bin size
if int((duration / 10000000)) < bin:
wordstartsec, wordstartmicrosec = convertduration(speech_to_text_response[i]["Offset"])
duration = duration + speech_to_text_response[i]["Offset"] - prev
prev = speech_to_text_response[i]["Offset"]
# transcript = transcript + " " + speech_to_text_response[i]["Word"]
else:
index = index + 1
# transcript = transcript + " " + speech_to_text_response[i]["Word"]
transcriptions.append(srt.Subtitle(index, datetime.timedelta(0, wordstartsec, wordstartmicrosec),
datetime.timedelta(0, wordstartsec + bin, 0), transcript))
duration = 0
# print(transcript)
transcript = ""
transcriptions.append(srt.Subtitle(index, datetime.timedelta(0, wordstartsec, wordstartmicrosec),
datetime.timedelta(0, wordstartsec + bin, 0), transcript))
subtitles = srt.compose(transcriptions)
with open("subtitle.srt", "w") as f:
f.write(subtitles)
def translation_once_from_mic():
"""performs one-shot speech translation from input from an audio file"""
# <TranslationOnceWithMic>
# set up translation parameters: source language and target languages
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
# Creates a translation recognizer using and audio file as input.
recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config, audio_config=audio_config)
# Starts translation, and returns after a single utterance is recognized. The end of a
# single utterance is determined by listening for silence at the end or until a maximum of 15
# seconds of audio is processed. It returns the recognized text as well as the translation.
# Note: Since recognize_once() returns only a single utterance, it is suitable only for single
# shot recognition like command or query.
# For long-running multi-utterance recognition, use start_continuous_recognition() instead.
print("請講一句話!")
result_handler(recognizer.recognize_once())
def translation_once_from_file(file_name):
"""performs one-shot speech translation from input from an audio file"""
# <TranslationOnceWithFile>
# set up translation parameters: source language and target languages
# Creates a translation recognizer using and audio file as input.
recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config, audio_config=audio_config)
# Starts translation, and returns after a single utterance is recognized. The end of a
# single utterance is determined by listening for silence at the end or until a maximum of 15
# seconds of audio is processed. The task returns the recognition text as result.
# Note: Since recognize_once() returns only a single utterance, it is suitable only for single
# shot recognition like command or query.
# For long-running multi-utterance recognition, use start_continuous_recognition() instead.
result = result_handler(recognizer.recognize_once())
save_result(file_name, result)
def translation_continuous(file_name):
"""performs continuous speech translation from input from an audio file"""
# <TranslationContinuous>
# set up translation parameters: source language and target languages
# Creates a translation recognizer using and audio file as input.
recognizer = speechsdk.SpeechRecognizer(
speech_config=speech_config, audio_config=audio_config)
def result_callback(event_type, evt):
"""callback to display a translation result"""
print("{}: {}\n\tTranslations: {}\n\tResult Json: {}".format(
event_type, evt, evt.result.translations.items(), evt.result.json))
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print(f'CLOSING on {evt}')
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
recognizer.session_stopped.connect(stop_cb)
recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)