使用正确的编码读取文件
chunksize=1230000
seq=1
for chunk in pd.read_csv(file_path2,encoding=‘utf-8’,chunksize=chunksize, on_bad_lines=‘skip’,low_memory=False):
print (chunk.shape)
sample_df=chunk.sample(n=500000,random_state=42)
output_path=f’/workdir/data/ONLINE_DATA_{seq}.csv’
sample_df.to_csv(output_path,encoding=‘utf-8’,index=False)
seq=seq+1