語音服務－語音轉換文字範例(from-file code)

第 12 屆 iThome 鐵人賽

DAY 27

Microsoft Azure

飛上雲端－Azure系列第 27 篇

12th鐵人賽 azure microsoft azure azure cognitive services 語音服務

Chao_Hsu

2020-10-12 23:36:20

3345 瀏覽

分享至

延續昨天，今天就來看看範例中是怎麼呼叫API及使用SDK吧！(打開index.html及token.php)

今天html及DOM的部分就先不理會

<!-- Speech SDK reference sdk. -->
<script src="microsoft.cognitiveservices.speech.sdk.bundle.js"></script>

首先引入了SDK中的js檔
接著這個function要在token.php中呼叫API以取得token
並在下面使用SDK的程式碼最後被呼叫

<!-- Speech SDK Authorization token -->
  <script>
  // Note: Replace the URL with a valid endpoint to retrieve
  //       authorization tokens for your subscription.
  var authorizationEndpoint = "token.php";

  function RequestAuthorizationToken() {
    if (authorizationEndpoint) {
      var a = new XMLHttpRequest();
      a.open("GET", authorizationEndpoint);
      a.setRequestHeader("Content-Type", "application/x-www-form-urlencoded");
      a.send("");
      a.onload = function() {
          var token = JSON.parse(atob(this.responseText.split(".")[1]));
          serviceRegion.value = token.region;
          authorizationToken = this.responseText;
          subscriptionKey.disabled = true;
          subscriptionKey.value = "using authorization token (hit F5 to refresh)";
          console.log("Got an authorization token: " + token);
      }
    }
  }
  </script>

authorizationEndpoint這個變數要放的是token.php的路徑
(如果是放在同一個資料夾的同一層就無需更改範例程式碼)
function RequestAuthorizationToken中
透過XMLHttpRequest來GET token.php取得的token
放到authorizationToken這個變數
並且其位置(region)放到serviceRegion.value

再來便要使用SDK了

// Speech SDK USAGE
  
    // status fields and start button in UI
    var phraseDiv;
    var startRecognizeOnceAsyncButton;

    // subscription key and region for speech services.
    var subscriptionKey, serviceRegion;
    var authorizationToken;
    var SpeechSDK;
    var recognizer;

    document.addEventListener("DOMContentLoaded", function () {
      startRecognizeOnceAsyncButton = document.getElementById("startRecognizeOnceAsyncButton");
      subscriptionKey = document.getElementById("subscriptionKey");
      serviceRegion = document.getElementById("serviceRegion");
      phraseDiv = document.getElementById("phraseDiv");

      startRecognizeOnceAsyncButton.addEventListener("click", function () {
        startRecognizeOnceAsyncButton.disabled = true;
        phraseDiv.innerHTML = "";

        // if we got an authorization token, use the token. Otherwise use the provided subscription key
        var speechConfig;
        if (authorizationToken) {
          speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
        } else {
          if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
            alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
            return;
          }
          speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
        }

        speechConfig.speechRecognitionLanguage = "zh-TW";
        var audioConfig  = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();
        recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);

        recognizer.recognizeOnceAsync(
          function (result) {
            startRecognizeOnceAsyncButton.disabled = false;
            phraseDiv.innerHTML += result.text;
            window.console.log(result);

            recognizer.close();
            recognizer = undefined;
          },
          function (err) {
            startRecognizeOnceAsyncButton.disabled = false;
            phraseDiv.innerHTML += err;
            window.console.log(err);

            recognizer.close();
            recognizer = undefined;
          });
      });

      if (!!window.SpeechSDK) {
        SpeechSDK = window.SpeechSDK;
        startRecognizeOnceAsyncButton.disabled = false;

        document.getElementById('content').style.display = 'block';
        document.getElementById('warning').style.display = 'none';

        // in case we have a function for getting an authorization token, call it.
        if (typeof RequestAuthorizationToken === "function") {
            RequestAuthorizationToken();
        }
      }
    });

一開始宣告了幾個變數
分別用來放UI介面的DOM元素以及API所需的key, region(上面有用到)以及SDK的物件等

// status fields and start button in UI
    var phraseDiv;
    var startRecognizeOnceAsyncButton;

    // subscription key and region for speech services.
    var subscriptionKey, serviceRegion;
    var authorizationToken;
    var SpeechSDK;
    var recognizer;

接著如果成功拿到授權的token
就利用SDK的fromAuthorizationToken()放到speechConfig中

// if we got an authorization token, use the token. Otherwise use the provided subscription key
        var speechConfig;
        if (authorizationToken) {
          speechConfig = SpeechSDK.SpeechConfig.fromAuthorizationToken(authorizationToken, serviceRegion.value);
        } else {
          if (subscriptionKey.value === "" || subscriptionKey.value === "subscription") {
            alert("Please enter your Microsoft Cognitive Services Speech subscription key!");
            return;
          }
          speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subscriptionKey.value, serviceRegion.value);
        }

然後設定辨識的語言，預設為美國地區的英文
要改成中文的話可以改為"zh-TW"
其他則如https://docs.microsoft.com/zh-tw/azure/cognitive-services/speech-service/language-support
目前還沒找到怎麼混著不同語言的方法

speechConfig.speechRecognitionLanguage = "zh-TW";

然後音訊的config則來自SDK的fromDefaultMicrophoneInput()
也就是使用電腦預設的麥克風

var audioConfig  = SpeechSDK.AudioConfig.fromDefaultMicrophoneInput();

兩個config都設定完成，就可以來初始化辨識器了

recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);

範例中使用的是非同步的一次性辨識
在偵測到一段沉默時即停止辨識
在這段程式碼中，會將結果的文字放到網頁頁面的文字框中

recognizer.recognizeOnceAsync(
          function (result) {
            startRecognizeOnceAsyncButton.disabled = false;
            phraseDiv.innerHTML += result.text;
            window.console.log(result);

            recognizer.close();
            recognizer = undefined;
          },
          function (err) {
            startRecognizeOnceAsyncButton.disabled = false;
            phraseDiv.innerHTML += err;
            window.console.log(err);

            recognizer.close();
            recognizer = undefined;
          });