作为编码问题集合:
2)UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbd in position 0: invalid start byte
从晚上复制的文本以ANSI编码储存为新文件使用以下代码报错
import numpy as np
DATA_PATH = r'C:\Users\Administrator.SC-201605202132\AppData\Local\Programs\Python\Python37\forTest\test.txt'
SENTENSE_NUM = 200000 #要读取的句子数目
X = []
y = []
with open(DATA_PATH,'rt',encoding='utf8') as f:#
for s in f.readlines():#这里报错
s = s.strip()
if not s:
continue
tag_index = []
for i in range(0,len(s)-1):
c = s[i]
if c == '|':
continue
next_c = s[i+1]
if next_c == '|':
tag_index.append(1)
else:
tag_index