R语言词云图生成-优快云博客

本文链接：https://blog.youkuaiyun.com/coding_Joash/article/details/120421165

library(jiebaR)
library(wordcloud2)

#读入数据分隔符是‘\n’，字符编码是‘UTF-8’，what=''表示以字符串类型读入
file <- scan('ciyun.txt',sep='\n',what='',encoding="UTF-8")

txtList = lapply(file, strsplit,"\\s+") #使用空格符号进行分词
txtChar = tolower(unlist(txtList))

txtChar <- txtChar[nchar(txtChar)>2] #去除字符长度小于2的词语

txtChar <- table(txtChar) #统计词频
#grepl类似grep，但是返回逻辑向量，即是否包含pattern
txtChar <- txtChar[!grepl('^[0-9-]+$',names(txtChar),perl = TRUE)] #去除纯数字
txtChar <- txtChar[!grepl('^and|the$',names(txtChar),perl = TRUE)] #delete and the
txtChar <- sort(txtChar, decreasing = TRUE)[1:100] #降序排序，并提取出现次数最多的前100个词语
data=data.frame(txtChar)
data = dplyr::filter(data,Freq>=2)#保留频率>=2的单词

#.pdf替换成空白
data$txtChar = sub('\\.pdf','',data$txtChar)

#绘制词云图
wordcloud2(data, size = 1,shape = 'star')