要统计工作报告热词频率并生成词云,我按以下步骤操作:
- 读取工作报告文本文件。
- 对文本进行分词处理。
- 统计每个词的出现频率。
- 生成词云图。
下面是实现该功能的 Python 代码:
import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def read_text(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
return text
except FileNotFoundError:
print(f"错误:未找到文件 {file_path}")
return None
except Exception as e:
print(f"错误:读取文件时发生未知错误 {e}")
return None
def tokenize_text(text):
words = jieba.lcut(text)
return words
def count_word_frequency(words):
word_count = Counter(words)
return word_count
def generate_wordcloud(word_count):
wordcloud = WordCloud(font_path='simhei.ttf', background_color='white').generate_from_frequencies(word_count)
plt.figure(figsize=(8, 8))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
if __name__ == "__main__":
file_path = 'government_report.txt'
text = read_text(file_path)
if text:
words = tokenize_text(text)
word_count = count_word_frequency(words)
generate_wordcloud(word_count)