作业5 编写程序，统计两会政府工作报告热词频率，并生成词云

最新推荐文章于 2025-12-15 12:46:33 发布

原创最新推荐文章于 2025-12-15 12:46:33 发布 · 351 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#python

Python作业专栏收录该内容

5 篇文章

订阅专栏

先利用清华大学镜像网加快所需pip下载速度，pip install jieba -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install wordcloud -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install matplotlib -i https://pypi.tuna.tsinghua.edu.cn/simple

结果：

代码：

import jieba
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt

# 读取政府工作报告文本
def read_text(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

# 分词并统计词频
def count_words(text):
    # 使用jieba进行分词
    words = jieba.lcut(text)
    # 过滤单个字符的词
    words = [word for word in words if len(word) > 1]
    # 统计词频
    word_counts = Counter(words)
    return word_counts

# 生成词云
def generate_wordcloud(word_counts):
    # 将词频转换为字符串形式
    word_freq = {word: freq for word, freq in word_counts.items()}
    # 创建词云对象
    wordcloud = WordCloud(
        font_path='simhei.ttf',  # 指定中文字体
        width=800,
        height=600,
        background_color='white',  # 背景颜色
        max_words=100,  # 最大显示的词数
        max_font_size=100,  # 最大字体大小
    ).generate_from_frequencies(word_freq)
    return wordcloud

# 显示词云
def display_wordcloud(wordcloud):
    plt.figure(figsize=(10, 8))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')  # 关闭坐标轴
    plt.show()

# 主程序
if __name__ == "__main__":
    # 政府工作报告文本文件路径
    report_path = 'report.txt'
    # 读取文本
    report_text = read_text(report_path)
    # 统计词频
    word_counts = count_words(report_text)
    # 打印前10个高频词
    print("高频词统计：")
    for word, freq in word_counts.most_common(10):
        print(f"{word}: {freq}")
    # 生成词云
    wordcloud = generate_wordcloud(word_counts)
    # 显示词云
    display_wordcloud(wordcloud)