JSON读取大量数据时报错:JSONDecodeError: Extra data: line 2 column 1

报错代码

修改为以下代码

# -*- coding: utf-8 -*-
import json
import codecs
import codecs


class ProcessDgreData:
    def __init__(self):
        self.train_file ="./test.json"
 
    def get_ner_data(self):
        with open(self.train_file,'r',encoding='utf-8') as fp:
            num = 0
            for i in fp.readlines():
                num += 1
                dic = json.loads(i)
                sentence_list = []
                h_list= []
                e_list = []
                type_list = []
                sentence = dic['originalText']
                entities = dic['entities']
                tmp = {}
                label_list = ["O"] * len(sentence)
                
                for c in sentence:
                    sentence_list.append(c)
                
                for j in entities:
                    # name = j['name']
                    label = j['label_type']
                    h_start = j['start_pos']
                    h_end = j['end_pos']-1 
                    h_list.append(h_start)
                    e_list.append(h_end)
                    type_list.append(label)
                
                for i,j,k in zip(h_list,e_list,type_list):
                    # print(i,j,k)
                    label_list[i] = "B-" + k
                    for i in range(i + 1, j + 1):
                        label_list[i] = "I-" + k
                print(len(sentence))
                print(len(label_list))
                # print(sentence)
                # print(label_list)
                print('=========================')
                tmp["id"] = "BIO" + str(num)
                tmp["text"] = sentence_list
                tmp["labels"] = label_list
                with open('./final_test.json','a',encoding='utf-8') as fp:
                    json.dump(tmp,fp,ensure_ascii=False)
                    fp.write('\n')
 
 
 
if __name__ == "__main__":
    processDgreData = ProcessDgreData()
    processDgreData.get_ner_data()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值