import json
import pandas as pd
def print_csv(src_csv,tgt_csv):
g = open(tgt_csv, 'w', encoding='utf-8')
reader = pd.read_csv(src_file, iterator=True)
loop = True
chunkSize = 500000
num_line = 0
sum_line = 0
while loop:
try:
if sum_line % 500000 == 0:
print("第%d行" % sum_line)
lines = reader.get_chunk(chunkSize)
for i in range(len(lines)):
real_line = num_line * chunkSize + i
dic = {'id':'', 'text':''}
sum_line += 1
num = lines['classification'][real_line]
abs = lines['abs'][real_line]
if str(num)=='nan' or str(abs)=='nan':
continue
dic['id'] = num
dic['text'] = abs
json_data = json.dumps(dic, ensure_ascii=False)
g.write(json_data + '\n')
num_line += 1
except StopIteration:
loop = False
print("Iteration is stopped")
g.close()
if __name__ == '__main__':
src_csv = 'patent.csv'
tgt_csv = 'new_patent.json'
print_csv(src_csv, tgt_csv)