您的訂閱是我製作影片的動力
訂閱點這裡~
影片程式碼 (延續昨天)
#均值+眾數 vs 列入各群權重
final_mean <- final[,c(1:4,6)] %>%
group_by(分群) %>%
summarise(col1 = mean(Sepal.Length),col2 = mean(Sepal.Width),
col3 = mean(Petal.Length),col4 = mean(Petal.Width)) %>%
mutate(總值=col1 + col2 + col3 + col4)
final_mode <- final[,c(5:6)] %>%
group_by(分群,Species) %>%
summarise(眾數=n()) %>%
top_n(1)
final_exp <- cbind(final_mean,final_mode[,2])
names(final_exp)[names(final_exp)=="Species"]="col5"
#補總差距最小 vs 每個欄位 - 每群值,取最接近的並附上權重
iris_na_fill <- iris_na[,1:4] %>%
mutate(總值=rowSums(iris_na[,1:4],na.rm = TRUE)) %>%
mutate(c1差=abs(總值-subset(final_exp,分群==1)$總值)) %>%
mutate(c2差=abs(總值-subset(final_exp,分群==2)$總值)) %>%
mutate(c3差=abs(總值-subset(final_exp,分群==3)$總值)) %>%
mutate(c4差=abs(總值-subset(final_exp,分群==4)$總值)) %>%
mutate(最小=pmin(c1差,c2差,c3差,c4差)) %>%
mutate(分群=ifelse(最小==c1差,1,
ifelse(最小==c2差,2,
ifelse(最小==c3差,3,4)))) %>%
mutate_if(is.numeric, round, digits=1)
iris_na_fill <- left_join(iris_na_fill,final_exp,by='分群')
iris_na_fill <- cbind(iris_na,iris_na_fill[,c(11:15,17)])
iris_na_fill <- iris_na_fill %>%
mutate(v1 = ifelse(is.na(Sepal.Length),col1,Sepal.Length)) %>%
mutate(v2 = ifelse(is.na(Sepal.Width),col2,Sepal.Width)) %>%
mutate(v3 = ifelse(is.na(Petal.Length),col3,Petal.Length)) %>%
mutate(v4 = ifelse(is.na(Petal.Width),col4,Petal.Width)) %>%
mutate(v5 = ifelse(is.na(Species),col5,Species))
done <- iris_na_fill[12:16]
若內容有誤,還請留言指正,謝謝您的指教