1.分词
分词之后保存好分词结果,避免每次模型都在分词上浪费时间
import jieba
import codecs
f1 = open('C:/Users/DELL/Desktop/demo.txt','r',encoding = 'utf-8')
f2 = open('C:/Users/DELL/Desktop/demo_cut.txt','w',encoding = 'utf-8')
for line in f1:
seg = jieba.cut(line.strip(),cut_all=False) #line.strip() 是去掉一行末尾的空格或者\n等
s = ' '.join(seg)
f2.write(s+'\n')
f1.close()
f2.close()