import jieba
import re
eve_list =["测试中出现Power_Type_check","依据BOM和装配图,BOM中没有不用处理","内存问题反馈攻关组跟踪"]print(list(map(lambda x: re.sub("[\s\d%_/a-zA-Z]+","", x), eve_list)))
2. 去除了停用词,并且针对的是列表中的每一个字段
import jieba
import re
eve_list =["测试中出现Power_Type_check","依据BOM和装配图,BOM中没有不用处理,谢谢","内存问题反馈攻关组跟踪"]
stopwords =['谢谢','请']
cut_word =[jieba.lcut(every_str)for every_str in eve_list]
res =[[]for i inrange(len(eve_list))]for i inrange(len(cut_word)):
res[i]=[every_word for every_word in cut_word[i]if every_word notin stopwords]# 过滤停用词,所有字段
result =[]for i inrange(len(res)):
temp_string ="".join(res[i])
temp_clean_string = re.sub("[\s\d%_/a-zA-Z]+","", temp_string)
result.append(temp_clean_string)print(result)
3. 去除了停用词,并且针对的是字符串中的每一个字段
import jieba
import re
eve_list ="依据BOM和装配图,BOM中没有不用处理,谢谢"
stopwords =['谢谢','请']
cut_word_list = jieba.lcut(eve_list)
res =[]for every_word in cut_word_list:if every_word in stopwords:continue
res.append(every_word)
temp =" ".join(res)
res_string = re.sub("[\s\d%_/a-zA-Z#::.~\-]+","", temp)print(res_string)