@python生成词云(可变形状,可变文字颜色)
以《红楼梦》为例:
#调用jieba、wordcloud、PIL、numpy、matplotlib库
import jieba
from wordcloud import WordCloud,ImageColorGenerator
import matplotlib.pyplot as plt
from matplotlib import colors
from PIL import Image
import numpy as np
#读取文件
path_text='C:\\Users\86181\\.spyder-py3\\红楼梦.txt'
path_img='C:\\Users\\86181\\Desktop\\5.jpg'
#设置图片背景
background_image=np.array(Image.open(path_img))
mask=background_image
#建立颜色数组,可更改颜色
color_list=['#FF7F00','#FF7F00']
#调用
colormap=colors.ListedColormap(color_list)
#筛选词
excludes={"什么","一个","我们","那里","你们","如今",\
"说道","知道","老太太","起来","姑娘","这里",\
"出来","他们","众人","自己","一面","太太",\
"只见","怎么","奶奶","两个","没有","不是",\
"不知","这个","听见","咱们","就是","进来","东西"}
f=open(path_text,"r",encoding='utf-8')
txt=f.read()
f.close()
#中文分词函数,用于精确模式,即将字符串分割成等量的中文词组,返回结果是列表类型
words=jieba.lcut(txt)
#排除单个字符的分词结果,get方法,键存在则返回相应值,否则返回0
counts={}
for word in words:
if len(word)==1:
continue
else:
counts[word]=counts.get(word,0)+1
#如果含有excludes里的单词,删掉
for word in excludes:
del(counts[word])
#items方法,返回所有的键值
items=list(counts.items())
#按降序排序
items.sort(key=lambda x:x[1],reverse=True)
for i in range(8):
word,count=items[i]
print("{0:<5}{1:>5}".format(word,count))
#生成词云
newtxt=''.join(words)
wordcloud=WordCloud(font_path=r'C:\Windows\Fonts\msyh.ttc',\
background_color="white",\
stopwords=excludes,\
colormap=colormap,\
mask=mask,
).generate(newtxt)
#保存词云图片
fig = plt.figure()
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
plt.savefig('D:\\picture.jpg',dpi=500)
展示结果: