import matplotlib.pyplot as plt
import jieba
from pylab import mpl
#解决matplotlib框中显示中文失败的问题
mpl.rcParams['font.sans-serif'] = ['SimHei']
with open("all.txt", encoding="utf-8") as f:
contents = f.read()
words = jieba.lcut(contents)
#存储词组出现的次数
counts = {}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
#对词组进行排序并提取前50个词组
items.sort(key=lambda x: x[1], reverse=True)
names, dicts = [], []
for i in range(50):
word, count = items[i]
txt = "{0:<5}{1:>5}".format(word, count)
names.append(word)
dicts.append(count)
#用matplotlib画图展示
plt.figure()
plt.plot(names,dicts)
plt.xlabel('前50的中文词组')
plt.ylabel('中文词组出现频率')
plt.title("《我是传奇BOSS》文章中出现频率前50的中文词组")
plt.show()