#!/usr/bin/env python3
def savefile(path,text):
file = open(path,‘wb’)
file.write(text.encode(‘utf8’))
file.close()
def readfile(path):
file = open(path,‘rb’)
text = file.read()
file.close()
return text
def seg(data_path,save_path):
list = os.listdir(data_path)
for dir in list:
if dir == ‘.DS_Store’:
print(“out:”+dir)
continue
seg_dir = save_path + dir + ‘/’
if not os.path.exists(seg_dir):
os.makedirs(seg_dir)
files = os.listdir(data_path + dir +’/’)
for file in files:
if file == ‘.DS_Store’:
print(‘in:’+file)
continue
text = readfile(data_path +dir+’/’+file)
print(’--------------------’+(data_path +dir+’/’+file))
text = text.decode(‘gb2312’).replace(’\r\n’,’’).strip()
print(“1:—”+text)
text_cut = jieba.cut(