- pip install -i https://mirrors.aliyun.com/pypi/simple/ jieba wordcloud matplotlib
- pip install wordcloud matplotlib
- 这里需要安装的库
import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def read_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
return text
def extract_words(text):
# 使用结巴分词进行分词
words = jieba.lcut(text)
# 可以选择过滤掉一些停用词或单个字符的词
filtered_words = [word for word in words if len(word.strip()) > 1 and not word.isdigit()]
return filtered_words
def count_word_frequency(words):
return Counter(words)
def top_n_words(word_counts, n=10): #这里可以改提取的词的个数
return word_counts.most_common(n)
def generate_wordcloud(word_frequencies, width=800, height=400): #这里可以改变生成的图片的大小
wordcloud = WordCloud(width=width, height=height, background_color='white',
font_path='simhei.ttf').generate_from_frequencies(word_frequencies)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off') # 不显示坐标轴
plt.show()
# 主程序
if __name__ == "__main__":
file_path = 'E:\\我的作品\\djangoWeb\\pythonProject_Web_text\\message.txt' # 替换为你的文本文件路径
text = read_file(file_path)
words = extract_words(text)
word_counts = count_word_frequency(words)
top_10_words = top_n_words(word_counts)
# 将top 10的词频转换为字典,以便传递给WordCloud
top_10_dict = dict(top_10_words)
# 生成并显示词云
generate_wordcloud(top_10_dict, width=1200, height=600) # 可以根据需要调整宽度和高度