#叫用套件
install.packages("tidyverse")
library(stringr)
以先前專案所讀出來的投票議題文字資料當作資料來源。
wdpath=paste0(getwd(),"/Documents/GitHub/R_DayOfDataEnginner-2018/")
#[讀取]根據第一層資料,讀出第二層位置 並置入dfl dataframe備用
dfl<-read.csv(paste0(wdpath,"/dscsv/pagelist.csv"))
View(head(dfl$title,10))
文字資料比對
#Detec Match
str_detect(head(dfl$title,10),"國家")
str_which(head(dfl$title,10),"國家")
str_locate(head(dfl$title,10),"國家")
文字資料內容篩選
#Subset Strings
str_sub(head(dfl$title,10),1,2)
str_subset(head(dfl$title,10),"國家")
str_extract(head(dfl$title,10),"國家")
str_extract_all(head(dfl$title,10),"國家")
str_match(head(dfl$title,10),"國家")
str_match_all(head(dfl$title,10),"國家")
字串長度運作
#Manage Length
str_length(head(dfl$title,10))
str_length(head(as.character(dfl$title),10))
文字編排
str_pad(head(dfl$title,10),60)
str_pad(head(dfl$title,10),60,side = c("both"),pad = "~")
str_trunc(head(as.character(dfl$title),10),6)
str_trunc(head(as.character(dfl$title),10),6,side = c("right"),ellipsis = "...")
str_trim(head(as.character(dfl$title),10),side = c("both"))
str_trim(
str_trunc(head(as.character(dfl$title),10),6,side = c("right"),ellipsis = " "),
)
字元置換
#mutate Strings
str_sub(head(dfl$title,10),2,4) <- "鐵人賽"
str_sub(dfl$title,2,4)<-"鐵人賽"
str_replace(head(dfl$title,10),"國家","家國")
str_c
字串字元分割結合
#join and split
str_c(str_sub(head(dfl$title,10),2,4),str_sub(head(dfl$title,10),9,10))
str_c(str_sub(head(dfl$title,10),2,4),
str_sub(head(dfl$title,10),9,10),
collapse = "")
str_dup(str_sub(head(dfl$title,10),2,4),2)
paste0(letters,collapse = "")
排序
ORDER/Sort最大差異:
Order:根據結果,回應的是index值
Sort:根據結果,回應的是實際值
## Order Strings
str_order(letters[1:10],decreasing = TRUE)
str_sort(letters[1:10],decreasing = TRUE)
#Helper
str_conv(head(dfl$title,10),"UTF-8")
str_conv(head(dfl$title,10),"BIG-5")
str_view(as.character(dfl$title),"國家")
str_wrap(as.character(dfl$title),20)
View(dfl)
View(str_view(as.character(dfl$title),"國家"))