# this code generates a wordcloud from a markdown or markdown files in a folder
# you can specify the color field of the wordcloud
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import os
import glob
def folder_check(path):
if os.path.isfile(path):
return "file"
elif os.path.isdir(path):
return "folder"
else:
return "not found"
# 设置文件夹路径
default_path = "E:\\备份\\cloudnotes\\projects\\24-summber"
custom_path = input("input path or press enter to use default folder:")
custom_path = custom_path.replace("\\", "\\\\")
if custom_path == "":
custom_path = default_path
text_content = ""
# check if the folder is a folder
if folder_check(custom_path) == "folder":
for file_path in glob.glob(os.path.join(custom_path, "*.md")):
with open(file_path, "r", encoding="utf-8") as file:
text_content += file.read()
elif folder_check(custom_path) == "file":
try:
with open(custom_path, "r", encoding="utf-8") as file:
text_content = file.read()
except UnicodeDecodeError:
print("file is not utf-8 encoded!")
exit()
elif folder_check(custom_path) == "not found":
print("not found!")
exit()
# 设置字体路径
font_path = "simhei.ttf"
# 使用jieba进行中文分词
word_list = jieba.cut(text_content, cut_all=False)
words = " ".join(word_list)
# 使用中文停用词
stopwords = set([
"的", "了", "在", "是", "和", "有", "我", "一个", "这", "与", "我们", "上", "就", "也", "到", "那", "可以", "如果", "他",
"你", "们", "自己", "对", "很", "会", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "下", "上",
"对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将",
"于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要",
"人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们",
"和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以",
"这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会",
"同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们",
"那", "一", "些", "子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有",
"了", "的", "但", "由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些",
"子", "下", "个", "就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但",
"由", "以", "跟", "对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个",
"就", "也", "当", "将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟",
"对", "都", "而", "下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个", "就", "也", "当",
"将", "她", "及", "以", "将", "于", "们", "和", "会", "同", "它", "有", "了", "的", "但", "由", "以", "跟", "对", "都", "而",
"下", "上", "对", "要", "人", "可以", "这", "我们", "那", "一", "些", "子", "下", "个"])
# print(stopwords)
from PIL import Image
import numpy as np
# 根据图片生成一个蒙版
mask = np.array(Image.open("D:\desktop_temp\miku16th.jpg"))
# 就是让词云匹配图片的尺寸
import random
# 使用matplotlib的颜色映射
def red_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(0, 30) # 生成红色色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def orange_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(30, 45) # 生成橙色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def yellow_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(45, 60) # 生成黄色色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def green_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(120, 135) # 生成绿色色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def cyan_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(175, 190) # 生成青色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def blue_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(190, 240) # 生成蓝色色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def purple_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(270, 300) # 生成紫色色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def pink_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
h = random.uniform(300, 330) # 生成粉色调
s = random.uniform(50, 100) # 生成饱和度
l = random.uniform(40, 70) # 生成亮度
return "hsl(%f, %f%%, %f%%)" % (h, s, l)
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)
def custom_color_func(colorfield):
if colorfield == "red":
return red_color_func
elif colorfield == "orange":
return orange_color_func
elif colorfield == "yellow":
return yellow_color_func
elif colorfield == "green":
return green_color_func
elif colorfield == "cyan":
return cyan_color_func
elif colorfield == "blue":
return blue_color_func
elif colorfield == "purple":
return purple_color_func
elif colorfield == "pink":
return pink_color_func
elif colorfield == "grey":
return grey_color_func
color = input("input color field: ")
# 创建词云对象,指定中文字体
wc = WordCloud(
mask=mask, # 设置蒙版
stopwords=stopwords, # 设置停用词
font_path=font_path, # 设置字体路径
background_color="white", # 设置背景颜色
max_words=100, # 设置最大词数
width=800,
height=600, # 设置词云图片的大小
color_func=custom_color_func(color), # 设置自定义颜色映射函数
).generate(words)
# 显示词云图片
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")
plt.show()
simple python wordcloud from markdown
最新推荐文章于 2025-06-12 10:41:46 发布