交叉驗證不同方法組合的模型準確率
1.1 參數說明
1.2 程式碼
選擇模型組合方法
2.1 交叉比對結果
2.2 結論
交叉驗證不同方法組合的模型準確率。
1.1 參數說明
1.2 程式碼
# 產出各模型組合(交叉比對2*2*2共8種)
k <- c(1,2,3,4,5,6,7)
selectMatrix <- BitMatrix(length(k))
model_count = apply(selectMatrix, 1, function(x){ k[which(x==1)] })
model_count = model_count[-1]
# 定義參數初始值
mm_name = NULL
in800 = NULL
out800 = NULL
test = NULL
in800_acc = NULL
out800_acc = NULL
test_acc = NULL
in800_confus = NULL
test_confus = NULL
test_o800_acc = NULL
s1list = NULL
s2list = NULL
s3list = NULL
count2 = 1
# 交叉比較
for(count in 1:length(model_count)){
nmodel = length(model_count[[count]])
model_name = c(1,2,3,'ex3',"ex4",'ex5','ex6')[model_count[[count]]]
print(model_name)
# 各字準確度 & 各字機率占比
for(s1 in 1:2){
# 各字準確度(產權重表+新機率表)
if(s1 == 1){
new_model_i800 = get_new_model(namesmodel = model_name,stat = 'acc',dataset = "offical_in800")
new_model_o800 = get_new_model(namesmodel = model_name,stat = 'acc',dataset = "offical_noin800")
new_model_test = get_new_model(namesmodel = model_name,stat = 'acc',dataset = "test_data")
new_model_i800_new = new_model_i800[[2]]
new_model_o800_new = new_model_o800[[2]]
new_model_test_new = new_model_test[[2]]
new_model_i800 = new_model_i800[[1]]
new_model_o800 = new_model_o800[[1]]
new_model_test = new_model_test[[1]]
# 投票判斷 & 組合後判斷
for(s2 in 1:2){
# 投票判斷
if(s2 == 1){
# 閾值用最小值 & 用平均機率
for(s3 in 1:2){
# 用最小值
if(s3 == 1){
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_in800")
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_noin800")
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "test_data")
}
else{
# 用平均機率
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_in800")
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_noin800")
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "test_data")
}
# 模型組合名稱
mm_name[count2] = paste(model_name,collapse = ",")
s1list[count2] = s1
s2list[count2] = s2
s3list[count2] = s3
# 幾筆資料
in800[count2] = nrow(new_model_i800)
out800[count2] = nrow(new_model_o800)
test[count2] = nrow(new_model_test)
in800_acc[count2] = mean(new_model_i800$acc)
out800_acc[count2] = mean(new_model_o800$acc_null)
test_acc[count2] = mean(new_model_test$acc[new_model_test$origin_word != "isnull"])
test_o800_acc[count2] = mean(new_model_test$acc_null[new_model_test$origin_word == 'isnull'])
in800_confus[count2] = mean(new_model_i800$acc_null[new_model_i800$acc == 1])
test_confus[count2] = mean(new_model_test$acc_null[new_model_test$acc == 1])
count2 = count2 + 1
}
}
else{
# 組合後判斷
# 閾值用最小值 & 用平均機率
for(s3 in 1:2){
# 用最小值
if(s3 == 1){
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_in800",
new_data = new_model_i800,new_stat = new_model_i800_new)
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_noin800",
new_data = new_model_o800,new_stat = new_model_o800_new)
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "test_data",
new_data = new_model_test,new_stat = new_model_test_new)
}
else{
# 用平均機率
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_in800",
new_data = new_model_i800,new_stat = new_model_i800_new)
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_noin800",
new_data = new_model_o800,new_stat = new_model_o800_new)
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "test_data",
new_data = new_model_test,new_stat = new_model_test_new)
}
mm_name[count2] = paste(model_name,collapse = ",")
s1list[count2] = s1
s2list[count2] = s2
s3list[count2] = s3
in800[count2] = nrow(new_model_i800)
out800[count2] = nrow(new_model_o800)
test[count2] = nrow(new_model_test)
in800_acc[count2] = mean(new_model_i800$acc)
out800_acc[count2] = mean(new_model_o800$acc_null)
test_acc[count2] = mean(new_model_test$acc[new_model_test$origin_word != 'isnull'])
test_o800_acc[count2] = mean(new_model_test$acc_null[new_model_test$origin_word == 'isnull'])
in800_confus[count2] = mean(new_model_i800$acc_null[new_model_i800$acc == 1])
test_confus[count2] = mean(new_model_test$acc_null[new_model_test$acc == 1])
count2 = count2 + 1
}
}
}
}
else{
# 各字機率平均
new_model_i800 = get_new_model(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_in800")
new_model_o800 = get_new_model(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_noin800")
new_model_test = get_new_model(namesmodel = model_name,stat = 'mean_prob',dataset = "test_data")
new_model_i800_new = new_model_i800[[2]]
new_model_o800_new = new_model_o800[[2]]
new_model_test_new = new_model_test[[2]]
new_model_i800 = new_model_i800[[1]]
new_model_o800 = new_model_o800[[1]]
new_model_test = new_model_test[[1]]
# 投票判斷 & 組合後判斷
for(s2 in 1:2){
# 投票判斷
if(s2 == 1){
# 閾值用最小值 & 用平均機率
for(s3 in 1:2){
# 用最小值
if(s3 == 1){
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_in800")
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_noin800")
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "test_data")
}
else{
# 用平均機率
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_in800")
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_noin800")
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "test_data")
}
mm_name[count2] = paste(model_name,collapse = ",")
s1list[count2] = s1
s2list[count2] = s2
s3list[count2] = s3
in800[count2] = nrow(new_model_i800)
out800[count2] = nrow(new_model_o800)
test[count2] = nrow(new_model_test)
in800_acc[count2] = mean(new_model_i800$acc)
out800_acc[count2] = mean(new_model_o800$acc_null)
test_acc[count2] = mean(new_model_test$acc[new_model_test$origin_word != 'isnull'])
test_o800_acc[count2] = mean(new_model_test$acc_null[new_model_test$origin_word == 'isnull'])
in800_confus[count2] = mean(new_model_i800$acc_null[new_model_i800$acc == 1])
test_confus[count2] = mean(new_model_test$acc_null[new_model_test$acc == 1])
count2 = count2 + 1
}
}
else{
# 整併後判斷
# 閾值用最小值 & 用平均機率
for(s3 in 1:2){
# 用最小值
if(s3 == 1){
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_in800",
new_data = new_model_i800,new_stat = new_model_i800_new)
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "offical_noin800",
new_data = new_model_o800,new_stat = new_model_o800_new)
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'min_prob',dataset = "test_data",
new_data = new_model_test,new_stat = new_model_test_new)
}else{
# 用平均機率
new_model_i800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_in800",
new_data = new_model_i800,new_stat = new_model_i800_new)
new_model_o800$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "offical_noin800",
new_data = new_model_o800,new_stat = new_model_o800_new)
new_model_test$acc_null = get_min01(namesmodel = model_name,stat = 'mean_prob',dataset = "test_data",
new_data = new_model_test,new_stat = new_model_test_new)
}
mm_name[count2] = paste(model_name,collapse = ",")
s1list[count2] = s1
s2list[count2] = s2
s3list[count2] = s3
in800[count2] = nrow(new_model_i800)
out800[count2] = nrow(new_model_o800)
test[count2] = nrow(new_model_test)
in800_acc[count2] = mean(new_model_i800$acc)
out800_acc[count2] = mean(new_model_o800$acc_null)
test_acc[count2] = mean(new_model_test$acc[new_model_test$origin_word != 'isnull'])
test_o800_acc[count2] = mean(new_model_test$acc_null[new_model_test$origin_word == 'isnull'])
in800_confus[count2] = mean(new_model_i800$acc_null[new_model_i800$acc == 1])
test_confus[count2] = mean(new_model_test$acc_null[new_model_test$acc == 1])
count2 = count2 + 1
}
}
}
}
}
}
result = data.frame(
mm_name = mm_name,
s1list = s1list,
s2list = s2list,
s3list = s3list,
in800 = in800,
in800_acc = in800_acc,
in800_confus = in800_confus,
out800 = out800,
out800_acc = out800_acc,
test = test,
test_acc = test_acc,
test_confus = test_confus,
test_null = test_o800_acc
)
write.csv(result,file = "C:/Users/wooden/Desktop/dl/model/model_statement.csv",row.names = F)
選擇模型組合方法
2.1 交叉比對結果
2.2 考量
辨識時間
辨識準確度
2.2 結論
選定模型組合方法
選定組合哪幾個模型
讓我們繼續看下去...