DAY 27
1
AI & Data

## Day 27: 猜字AI加強版 -- Bigram Guesser

from operator import itemgetter

bigram_counts = defaultdict(Counter) # 這個dict的key是各個字母，value是可能接在這個字母後面之字母的Counter

bigram_inter = defaultdict(list) # 根據interpolation後的機率，將接續在一個字母後面的可能字母照出現機率排序

# 開始儲存Bigram model
for word in training_set:
this_count = Counter()
word = '$' + word # 字首加上開頭標籤 for i, letter in enumerate(word): if i+1 != len(word): this_count[word[i+1]] += 1 bigram_counts[letter] += this_count this_count = Counter() set_lambda = 0.75 # 設定interpolation的lambda # Bigram Interpolated Model: p = lambda*p(w_i|w_(i-1)) + (1-lambda)*p(w_i) # p(w_i|w_(i-1)) = count(w_i and w_(i-1)) / count(w_(i-1)) # p(w_i) = count(w_i) / sigma(count(w_i)) for key in bigram_counts.keys(): sigma_count_wi = sum(unigram_counts.values()) # sigma(count(w_i)) count_wi1 = sum(bigram_counts[key].values()) # count(w_(i-1)) # 計算p(w_i|w_(i-1))和p(w_i) prob_of_letter = {} for letter in range(97,123): p_wi_wi1 = bigram_counts[key][chr(letter)] / count_wi1 # p(w_i|w_(i-1)) p_wi = unigram_counts[chr(letter)] / sigma_count_wi # p(w_i) prob_of_letter[chr(letter)] = set_lambda*p_wi_wi1 + (1-set_lambda)*p_wi # 將接續在一個字母後面的可能字母照出現機率排序 this_list = [] for i in range(26): this_list.append(sorted(prob_of_letter.items(), key=itemgetter(1), reverse=True)[i][0]) bigram_inter[key] = this_list def bigram_guesser(mask, guessed, counts=bigram_counts): # add extra default arguments if needed """ 實現Bigram Guesser的方法，根據使用了線性插值法的Bigram model，回傳猜測的字母。 """ mask = ['$'] + mask
w_i_1 = "" # 找到最左邊非 '_' 的字母 -> w_(i-1)
# 找w_i_1
break
copy_bi_model = bigram_inter[w_i_1].copy()

# 將已經猜過的字母去除
for letter in guessed:
if letter in copy_bi_model:
copy_bi_model.remove(letter)

return copy_bi_model[0]

#hangman(np.random.choice(test_set), bigram_guesser, 26, True)

result = test_guesser(bigram_guesser)
print()
print("平均猜錯次數：", result)