def change_text(path):
with open(path,'r',encoding='utf-8') as f_read:
raw_text = f_read.read()
## 删除空格和换行
raw_text = re.sub(u'([\r\n ])','',raw_text)
## 提取中文数字英文
raw_text = re.sub(u"([^\u4e00-\u9fa5\u0030-s\u0039\u0041-\u005a\u0061-\u007a\,\.\,\。])","",raw_text)
print(raw_text)
with open('new.tt','w+') as f_write:
f_write.write(raw_text)
if __name__ =='__main__':
path = 'raw.tt'
change_text(path)
转载于:https://www.cnblogs.com/rise0111/p/11419619.html