path = r'h:\kill a bird.txt'
path_over = r'h:\kill a bird new.txt'
# read all article and every_line put into list
max_list_1 = []
with open(path, 'r') as f:
lines = f.readlines()
# process every_line : split ' ' convert signal word
for line in lines:
line_1 = line.split(' ')
for lin_word_1 in line_1:
max_list_1.append(lin_word_1 + '\n')
temp_list = []
for lin_word_2 in max_list_1:
if lin_word_2 != '\n':
temp_list.append(lin_word_2)
max_list_2 = []
for lin_word_3 in temp_list:
lin = lin_word_3.replace(',', '').replace('.', '').replace('\n{2,}', '\n').replace('(', '').replace(')', '').replace('!', ''). \
replace('\s+', '').replace('`', '').replace("'", '').replace(';', '').replace('-', '').replace('\xa1+', '')
max_list_2.append(lin)
max_list_3 = []
for lin_word_3 in max_list_2:
if lin_word_3 == '\n\n':
continue
max_list_3.append(lin_word_3)
with open(path_over, 'w') as f_over:
f_over.writelines(max_list_3)
将整个文本处理成一行一个单词的形式
最新推荐文章于 2022-01-22 21:23:40 发布