import jieba from astropy.table.np_utils import join import os import sys import jieba.posseg as pseg def main(): #分词结果存储列表 word_list = [] #词频统计词典{关键词:次数} word_dict = {} current_dir = os.path.abspath('.') #自定义词典文件last文件 dict_file = os.path.join(current_dir, 'last.txt') jieba.load_userdict(dict_file) #待分词的文件 file_name = os.path.join(current_dir, 'cutTest.txt') f = open(file_name, encoding="UTF8") line = f.readline() #分词之后写入result文件 file_name2 = os.path.join(current_dir, 'result.txt') f2 = open(file_name2, 'w', encoding='utf8') while line: seg_list = jieba.cut(line, cut_all=False) #jieba.add_word("有价值信息", freq=None, tag=None) seg_list = " ".join(seg_list) word_list.extend(seg_list.split(" ")) seg_list.encode("utf8") f2.write(seg_list) f2.write("\n") line = f.readline() f2.close() f.close() for item in word_list: if item not in word_dict: word_dict[item] = 1 else: word_dict[item] += 1 for k, v in word_dict.items(): print(k, v) print("end") if __name__ == '__main__': main()