import wordcloud
import re
import jieba
import pandas as pd
import numpy
from wordcloud import WordCloud
import matplotlib.pyplot as plt
type = ['','&percent_type=h','&percent_type=m','&percent_type=l']
for j in range(4):
with open('影评{}'.format(type[j]) + '.txt',"r",encoding='utf-8') as f:
data=f.readlines()
col = ''
for k in data:
k = k.strip('\n')
col=col+k
pattern = re.compile(r'[\u4e00-\u9fa5]+')
filterdata = re.findall(pattern, col)
cleanedcol=''.join(filterdata)
segment = jieba.lcut(cleanedcol)
words_df = pd.DataFrame({'segment': segment})
stopwords = pd.read_csv("stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'],encoding='gbk')
words_df=words_df[~words_df.segment.isin(stopwords.stopword)]
words_stat=words_df.groupby(by=['segment'])['segment'].agg([("计数",numpy.size)])
words_stat=words_stat.reset_index().sort_values(by=["计数"],ascending=False)
word_fre={x[0]:x[1] for x in words_stat.head(1000).values}
word_fre_list=[]
for key in word_fre:
temp = (key,word_fre[key])
word_fre_list.append(temp)
wordcloud=WordCloud(font_path="simhei.ttf",background_color="white",width=2000,height=1000)
wordcloud=wordcloud.fit_words(dict(word_fre_list))
wordcloud.to_file("F://PycharmDemo/Project/DouBan/temp{}".format(type[j])+".png")