词云分析之英文

from pyecharts import WordCloud
import jieba
import re
import nltk


with open(r'F:\算法\others\merry.txt', 'r', encoding='utf-8') as f:
    text = f.readlines()
    word_list = []
    word_dic = {}
    for line in text:
        if re.findall('([a-zA-Z])', line.strip()):
            result = "".join(i for i in line.strip() if ord(i) < 256)
            from nltk.tokenize import WordPunctTokenizer
            words = WordPunctTokenizer().tokenize(result)
            for w in words:
                if 97 <= ord(w.lower()[0]) <= 122:
                    word_list.append(w)
    # print(word_list)
    set_word_list = list(set(word_list))
    for set_word in set_word_list:
        word_dic[set_word] = word_list.count(set_word)
    # print(word_dic)
name = []
value = []
for k,v in word_dic.items():
    name.append(k)
    value.append(int(v)*100)
wordcloud = WordCloud(width=1300, height=620)
wordcloud.add("", name, value, word_size_range=[20, 100],shape='pentagon')
wordcloud.render('test.html')

  

转载于:https://www.cnblogs.com/pandaboy1123/p/10175229.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值