這篇會針對韓文翻譯機器人的功能,整合 Azure 的OCR、翻譯和文字轉換語音的工具,分別針對韓文的文字和含有韓文的圖片,進行翻譯並提供發音音檔。處理的流程分別如下:
上傳config.json
到 Azure Web App,詳情可看Chatbot integration- 看圖學英文的說明。
config.json
{
"line": {
"line_secret": "your line secret",
"line_token": "your line token",
},
"azure": {
"cv_key": "your subscription key of computer vision",
"cv_end": "your endpoint of computer vision",
"blob_connect": "your connect string",
"blob_container": "your blob container name",
"trans_key": "your subscription key of translator",
"speech_key": "your subscription key of speech"
}
python
套件requirements.txt
Flask==1.0.2
line-bot-sdk
azure-cognitiveservices-vision-computervision
azure-cognitiveservices-speech
azure-storage-blob
Pillow
langdetect
requests
import os
import json
import time
import requests
from flask import Flask, request, abort
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.storage.blob import BlobServiceClient
from azure.cognitiveservices.speech import (
SpeechConfig,
SpeechSynthesizer,
)
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from msrest.authentication import CognitiveServicesCredentials
from linebot import LineBotApi, WebhookHandler
from linebot.exceptions import InvalidSignatureError
from linebot.models import (
MessageEvent,
TextMessage,
TextSendMessage,
FlexSendMessage,
ImageMessage,
)
from PIL import Image
from langdetect import detect
app = Flask(__name__)
CONFIG = json.load(open("/home/config.json", "r"))
# 取得電腦視覺的用戶權限
SUBSCRIPTION_KEY = CONFIG["azure"]["cv_key"]
ENDPOINT = CONFIG["azure"]["cv_end"]
CV_CLIENT = ComputerVisionClient(
ENDPOINT, CognitiveServicesCredentials(SUBSCRIPTION_KEY)
)
# 連結 blob service
CONNECT_STR = CONFIG["azure"]["blob_connect"]
CONTAINER = CONFIG["azure"]["blob_container"]
BLOB_SERVICE = BlobServiceClient.from_connection_string(CONNECT_STR)
# 取得翻譯工具的金鑰
TRANS_KEY = CONFIG["azure"]["trans_key"]
# 設定 Azure 語音的 config
SPEECH_KEY = CONFIG["azure"]["speech_key"]
SPEECH_CONFIG = SpeechConfig(subscription=SPEECH_KEY, region="eastus2")
SPEECH_CONFIG.speech_synthesis_language = "ko-KR"
LINE_SECRET = CONFIG["line"]["line_secret"]
LINE_TOKEN = CONFIG["line"]["line_token"]
LINE_BOT = LineBotApi(LINE_TOKEN)
HANDLER = WebhookHandler(LINE_SECRET)
@app.route("/")
def hello():
"hello world"
return "Hello World!!!!!"
# 上傳檔案到 Azure blob
def upload_blob(container, path):
"""
Upload files to Azure blob
"""
blob_client = BLOB_SERVICE.get_blob_client(container=container, blob=path)
with open(path, "rb") as data:
blob_client.upload_blob(data, overwrite=True)
data.close()
return blob_client.url
# 透過 Azure 電腦視覺執行 OCR
def azure_ocr(url):
"""
Azure OCR: get characters from image url
"""
ocr_results = CV_CLIENT.read(url, raw=True)
operation_location_remote = ocr_results.headers["Operation-Location"]
operation_id = operation_location_remote.split("/")[-1]
while True:
get_handw_text_results = CV_CLIENT.get_read_result(operation_id)
if get_handw_text_results.status not in ["notStarted", "running"]:
break
time.sleep(1)
text = []
if get_handw_text_results.status == OperationStatusCodes.succeeded:
for text_result in get_handw_text_results.analyze_result.read_results:
for line in text_result.lines:
text.append(line.text)
if len(text) == 0:
return text
return []
# 將字串翻譯成中文
def azure_translation(string, message_id):
"""
Translation with azure API
"""
trans_url = "https://api.cognitive.microsofttranslator.com/translate"
params = {"api-version": "2.0", "to": ["zh-Hant"]}
headers = {
"Ocp-Apim-Subscription-Key": TRANS_KEY,
"Content-type": "application/json",
"Ocp-Apim-Subscription-Region": "eastus2",
}
body = [{"text": string}]
req = requests.post(trans_url, params=params, headers=headers, json=body)
response = req.json()
output = ""
speech_button = ""
ans = []
for i in response:
ans.append(i["translations"][0]["text"])
language = response[0]["detectedLanguage"]["language"]
# 如果是韓文的話,就透過 Azure Speech 取得發音
if language == "ko":
output = " ".join(string) + "\n" + " ".join(ans)
speech_button = azure_speech(string, message_id)
return output, speech_button
# 將字串轉換成音訊檔,並且上傳到 Azure blob
def azure_speech(string, message_id):
"""
Azure speech: text to speech, and save wav file to azure blob
"""
file_name = "{}.wav".format(message_id)
audio_config = AudioOutputConfig(filename=file_name)
synthesizer = SpeechSynthesizer(
speech_config=SPEECH_CONFIG, audio_config=audio_config
)
synthesizer.speak_text_async(string)
# 上傳 Azure blob,並取得 URL
link = upload_blob(CONTAINER, file_name)
# 將 URL 包裝成 Flex message的按扭,以便最後輸出
output = {
"type": "button",
"flex": 2,
"style": "primary",
"color": "#1E90FF",
"action": {"type": "uri", "label": "Voice", "uri": link},
"height": "sm",
}
os.remove(file_name)
return output
# 為了跟 Line platform 溝通的 webhook
@app.route("/callback", methods=["POST"])
def callback():
"""
LINE bot webhook callback
"""
# get X-Line-Signature header value
signature = request.headers["X-Line-Signature"]
print(signature)
body = request.get_data(as_text=True)
print(body)
try:
HANDLER.handle(body, signature)
except InvalidSignatureError:
print(
"Invalid signature. Please check your channel access token/channel secret."
)
abort(400)
return "OK"
# 如果傳給 chatbot 的訊息是文字,做以下處理
@HANDLER.add(MessageEvent, message=TextMessage)
def handle_message(event):
"""
Reply text message
"""
with open("templates/detect_result.json", "r") as f_h:
bubble = json.load(f_h)
f_h.close()
# 利用 langdetect 此套件的 detect 判斷是否為韓文
if detect(event.message.text) == "ko":
output, speech_button = azure_translation(event.message.text, event.message.id)
# header 是放圖片的部分,沒有圖片的話,就先去除
bubble.pop("header")
# 放入翻譯的結果
bubble["body"]["contents"][0]["text"] = output
# 放入語音連結的按鈕
bubble["body"]["contents"].append(speech_button)
# 調整 body 的高度
bubble["body"]["height"] = "{}px".format(150)
message = FlexSendMessage(alt_text="Report", contents=bubble)
else:
message = TextSendMessage(text=event.message.text)
LINE_BOT.reply_message(event.reply_token, message)
# 如果傳給 chatbot 的訊息是圖片,做以下處理
@HANDLER.add(MessageEvent, message=ImageMessage)
def handle_content_message(event):
"""
Reply Image message with results of image description and objection detection
"""
print(event.message)
print(event.source.user_id)
print(event.message.id)
with open("templates/detect_result.json", "r") as f_h:
bubble = json.load(f_h)
f_h.close()
filename = "{}.jpg".format(event.message.id)
message_content = LINE_BOT.get_message_content(event.message.id)
with open(filename, "wb") as f_h:
for chunk in message_content.iter_content():
f_h.write(chunk)
f_h.close()
img = Image.open(filename)
link = upload_blob(CONTAINER, filename)
text = azure_ocr(link)
output, speech_button = azure_translation(" ".join(text), event.message.id)
# 整合圖片、翻譯結果和音訊按鈕到 flex message
bubble["body"]["contents"].append(speech_button)
bubble["body"]["height"] = "{}px".format(150)
bubble["body"]["contents"][0]["text"] = output
bubble["header"]["contents"][0]["url"] = link
bubble["header"]["contents"][0]["aspectRatio"] = "{}:{}".format(
img.size[0], img.size[1]
)
LINE_BOT.reply_message(
event.reply_token, [FlexSendMessage(alt_text="Report", contents=bubble)]
)
最後應該會得到如下圖的效果,這個圖片是從一本年代久遠的韓文講義找到的內容。有趣的是,OCR 有把咖啡杯上的 coffee 辨識出來,轉換成語音時,也用韓文發音 coffee ,跟커피發音相同,聽起來比較像 "kopee"。發音也與真人無異,效果可以參照此影片:https://youtu.be/AC154CVsLZ4
接下來,我們試試人臉辨識,明天見。