一、将数据保存到文件
1.piplines.py文件
1 import json 2 3 class TencentPipeline(object): 4 5 def open_spider(self,spider): 6 if spider.name=='hr_tencent': 7 self.file=open('data.json','w') 8 9 def process_item(self, item, spider): 10 if spider.name=='hr_tencent': 11 data=dict(item) 12 # data=json.dumps(data,ensure_ascii=False) 13 data=json.dumps(data) 14 self.file.write(data+',\n') 15 return item 16 17 def close_spider(self,spider): 18 if spider.name=='hr_tencent': 19 self.file.close()
2.settings.py文件
1 ITEM_PIPELINES = { 2 'tencent.pipelines.TencentPipeline': 300, 3 }
二、将数据保存到mongodb
1.piplines.py文件
1 from pymongo import MongoClient 2 3 4 class Tencent1Pipeline(object): 5 def open_spider(self,spider): 6 if spider.name == 'hr_tencent1': 7 self.client=MongoClient('127.0.0.1',27017) 8 self.tencent=self.client['tencent']['tencent'] 9 def process_item(self,item,spider): 10 if spider.name == 'hr_tencent1': 11 print(item) 12 self.tencent.insert(dict(item)) 13 return item 14 def close_spider(self,spider): 15 if spider.name == 'hr_tencent1': 16 self.client.close()
2.settings.py文件
1 ITEM_PIPELINES = { 2 'tencent.pipelines.Tencent1Pipeline': 299, 3 }