simple python wordcloud from markdown

最新推荐文章于 2025-06-12 10:41:46 发布
kriss-spy
最新推荐文章于 2025-06-12 10:41:46 发布
阅读量129
点赞数 2
分类专栏：小白编程日志文章标签：学习 python
本文链接：https://blog.youkuaiyun.com/2301_81944256/article/details/140270361
版权
小白编程日志专栏收录该内容
3 篇文章
订阅专栏
# this code generates a wordcloud from a markdown or markdown files in a folder
# you can specify the color field of the wordcloud

import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import os
import glob


def folder_check(path):
    if os.path.isfile(path):
        return "file"
    elif os.path.isdir(path):
        return "folder"
    else:
        return "not found"

# 设置文件夹路径
default_path = "E:\\备份\\cloudnotes\\projects\\24-summber"
custom_path = input("input path or press enter to use default folder:")
custom_path = custom_path.replace("\\", "\\\\")
if custom_path == "":
    custom_path = default_path
    
text_content = ""
    
# check if the folder is a folder
if folder_check(custom_path) == "folder":
    for file_path in glob.glob(os.path.join(custom_path, "*.md")):
        with open(file_path, "r", encoding="utf-8") as file:
            text_content += file.read()
elif folder_check(custom_path) == "file":
    try:
        with open(custom_path, "r", encoding="utf-8") as file:
            text_content = file.read()
    except UnicodeDecodeError:
        print("file is not utf-8 encoded!")
        exit()
        
elif folder_check(custom_path) == "not found":
    print("not found!")
    exit()

# 设置字体路径
font_path = "simhei.ttf"


# 使用jieba进行中文分词
word_list = jieba.cut(text_content, cut_all=False)
words = " ".join(word_list)

# 使用中文停用词
stopwords = set([
    "的", "了", "在", "是", "和", "有", "我", "一个", "这", "与", "我们", "上", "就", "也", "到", "那", "可以", "如果", "他",
    "你", "们", "自己", "对", "很", "会", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "下", "上",
    "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将",
    "于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要",
    "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们",
    "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以",
    "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会",
    "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们",
    "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有",
    "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些",
    "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但",
    "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个",
    "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟",
    "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当",
    "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而",
    "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个"])

# print(stopwords)

from PIL import Image
import numpy as np

# 根据图片生成一个蒙版
mask = np.array(Image.open("D:\desktop_temp\miku16th.jpg"))
# 就是让词云匹配图片的尺寸

import random

# 使用matplotlib的颜色映射

def red_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(0, 30)  # 生成红色色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def orange_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(30, 45)  # 生成橙色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def yellow_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(45, 60)  # 生成黄色色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def green_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(120, 135)  # 生成绿色色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def cyan_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(175, 190)  # 生成青色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(190, 240)  # 生成蓝色色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def purple_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(270, 300)  # 生成紫色色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def pink_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    h = random.uniform(300, 330)  # 生成粉色调
    s = random.uniform(50, 100)   # 生成饱和度
    l = random.uniform(40, 70)    # 生成亮度
    return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
    return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)


def custom_color_func(colorfield):

    if colorfield == "red":
        return red_color_func
    elif colorfield == "orange":
        return orange_color_func
    elif colorfield == "yellow":
        return yellow_color_func
    elif colorfield == "green":
        return green_color_func
    elif colorfield == "cyan":
        return cyan_color_func
    elif colorfield == "blue":
        return blue_color_func
    elif colorfield == "purple":
        return purple_color_func
    elif colorfield == "pink":
        return pink_color_func
    elif colorfield == "grey":
        return grey_color_func
        
color = input("input color field: ")

# 创建词云对象，指定中文字体
wc = WordCloud(
    mask=mask,  # 设置蒙版
    stopwords=stopwords, # 设置停用词
    font_path=font_path,  # 设置字体路径
    background_color="white",  # 设置背景颜色
    max_words=100,  # 设置最大词数
    width=800,
    height=600,  # 设置词云图片的大小
    color_func=custom_color_func(color),  # 设置自定义颜色映射函数
).generate(words)

# 显示词云图片
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.show()