iT邦幫忙

4

Google Docs Engine 實戰 忍の道徽章排行


這篇是 使用 Google Docs 打造 Wep App Engine 的延續,
目的是做個實例,讓大家能夠更瞭解它的運作方式!
以下是實作[忍の道徽章排行]的思路:
1.首先從 這裡 分析所有參加忍の道活動的ID、暱稱,存進 FusionTables。
2.再透過 http://ithelp.ithome.com.tw/js-ninja/sticker-badges/user/**ID** 統計每個人的徽章數。
3.最後在 Web App 上利用 Visualization 顯示徽章排行榜!

再來是 Code 的部份:
1.SpreadSheet (Google 試算表) 裡的 Google app script :

// Table public id
var tableID = "3707276";
// Account
var email = UserProperties.getProperty('email');
var password = UserProperties.getProperty('password');
if (email === null || password === null) {
  email = Browser.inputBox('Enter email');
  password = Browser.inputBox('Enter password');
  UserProperties.setProperty('email',email);
  UserProperties.setProperty('password', password);
};
// Google Auth Token
var authToken = getGAauthenticationToken(email,password);
// Google Auth
function getGAauthenticationToken(email, password) {
  password = encodeURIComponent(password);
  var response = UrlFetchApp.fetch("https://www.google.com/accounts/ClientLogin", {
    method: "post",
    payload: "accountType=GOOGLE&Email=" + email + "&Passwd=" + password + "&service=fusiontables&Source=js-ninja"
  });
  var responseStr = response.getContentText();
  responseStr = responseStr.slice(responseStr.search("Auth=") + 5, responseStr.length);
  responseStr = responseStr.replace(/\n/g, "");
  return responseStr;
};
// 使用者名單
function JS_List() {
  for (var i=1 ; i <= 24 ; i++) {
    var URL = "http://ithelp.ithome.com.tw/js-ninja/hero-list/page/" + i + "/level/1";
    var response = UrlFetchApp.fetch(URL);
    var code = response.getResponseCode();
    if (code == 200) {
      var html = response.getContentText("UTF-8");
      var list = html.split('hero_list_bg');
      for (var j=1; j < list.length; j++) {
        var id = substr(0,list[j],'id=','"');
        var name = substr(0,list[j],'left">',' ');
        // 新增
        var SQL = "INSERT INTO " + tableID + " ('ID','Name') VALUES ('" + id + "','" + name + "');";
        UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
          method: "POST",
          payload : {"sql" : SQL},
          headers: {"Authorization": "GoogleLogin auth=" + authToken,}
        });
      }
    }
  }
}
// 計算徽章數
function Badge() {
  // 查詢
  var SQL = "SELECT rowid,ID FROM " + tableID;
  var SQLresponse = UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
    method: "POST",
    payload : {"sql" : SQL},
    headers: {"Authorization": "GoogleLogin auth=" + authToken,}
  });
  var tableData = SQLresponse.getContentText();
  var resParse  = Utilities.parseCsv(tableData);
   
  for (var i=1; i < resParse.length; i++) {
    var id = resParse[i][1];
    var URL = "http://ithelp.ithome.com.tw/js-ninja/sticker-badges/user/" + id;
    var response = UrlFetchApp.fetch(URL);
    var code = response.getResponseCode();
    if (code == 200) {
      var html = response.getContentText("UTF-8");
      var total = html.split('icon1').length-1;
      // 更新
      var SQL = "UPDATE " + tableID + " SET 'Total' = '" + total + "' WHERE rowid = '"+ resParse[i][0] +"';";
      UrlFetchApp.fetch("http://www.google.com/fusiontables/api/query", {
        method: "POST",
        payload : {"sql" : SQL},
        headers: {"Authorization": "GoogleLogin auth=" + authToken,}
      });
    }
  }
};
// 字串擷取
function substr(index,data,star,end){
  var x = data.indexOf(star,index);
  var y = data.indexOf(end,x);
  return data.substring(x+star.length,y);
};

2.接著將 FusionTables 設為公開!(點連結進去可以看到原始資料)
3.最後用 Visualization 從 FusionTables 抓資料!(完整範例

google.load('visualization', '1');
function drawTable() {
 // Construct query
 var query = "SELECT 'Name' as '暱稱', 'Total' as '徽章數' FROM 3707276";
 var team = document.getElementById('team').value;
 if (team) {
  query += " WHERE 'Total' >= '" + team + "'";
 }
 var queryText = encodeURIComponent(query);
 var gvizQuery = new google.visualization.Query(
  'http://www.google.com/fusiontables/gvizdata?tq='  + queryText);
 // Send query and draw table with data in response
 gvizQuery.send(function(response) {
  var numRows = response.getDataTable().getNumberOfRows();
  var numCols = response.getDataTable().getNumberOfColumns();
 
  var ftdata = [''];
  for (var i = 0; i < numCols; i++) {
   var columnTitle = response.getDataTable().getColumnLabel(i);
   ftdata.push('');
  }
  ftdata.push('');
  for (var i = 0; i < numRows; i++) {
   ftdata.push('');
   for(var j = 0; j < numCols; j++) {
    var rowValue = response.getDataTable().getValue(i, j);
    ftdata.push('');
   }
   ftdata.push('');
  }
  ftdata.push('<table><thead>
<tr><th>' + columnTitle + '</th></tr>
</thead><tbody>
<tr><td>' + rowValue + '</td></tr>
</tbody></table>
');
  document.getElementById('ft-data').innerHTML = ftdata.join('');
 });
}
google.setOnLoadCallback(drawTable);

其實這篇和 YQL 都可以視為 Javascript 跨網域的解決方案,
但 YQL 比較適合單一網頁,例如用 jQuery + YQL 示範的文章清單
而這篇的應用對於抓取大量網頁非常有優勢,
根據我的經驗,在限制的執行時間內 Google 能抓取的網頁數量約為 150 個左右!
超過 150 頁的話,就得用其他技巧讓它分批執行了。


尚未有邦友留言

立即登入留言