1 import requests 2 import json 3 from dbutil.pgsql import PgsqlPipeline 4 from datetime import date 5 6 7 headers={ 8 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 9 'Accept-Encoding': 'gzip, deflate, br', 10 'Accept-Language': 'zh-CN,zh;q=0.9', 11 12 } 13 14 15 def get_Provinces(): 16 url = 'https://www.ftms.com.cn/website/Maintenance/getProvince' 17 response =requests.get(url,headers=headers,verify=False) 18 provinces_list= json.loads(response.text)['data'] 19 return provinces_list 20 21 22 def get_Citys(provinces_cid): 23 24 city_url =f'https://www.ftms.com.cn/website/Maintenance/getCity?cid={provinces_cid}' 25 26 27 res=requests.get(city_url,verify=False,headers=headers) 28 cities=json.loads(res.text)['data'] 29 return cities 30 # print(cities) 31 32 # city_list=json.loads(reponse.text)['data'] 33 # for city_dict in city_list: 34 # city_cid=city_dict.get('cid') 35 # city_name=city_dict.get('name') 36 # print(city_cid,city_name) 37 # 38 39 40 def get_Dealers(city_id,provinces_cid): 41 42 dealers_url ='https://www.ftms.com.cn/website/Dealer/getDealer' 43 data={'cityName': "" ,'cityid': city_id,'dealerName': "",'provinceName': "",'provinceid': provinces_cid} 44 # data={"provinceid":"420000","cityid":"420700","dealerName":"","cityName":"","provinceName":""} 45 print(data) 46 47 48 dealer_res=requests.post(dealers_url,headers=headers,verify=False,data=json.dumps(data)) 49 dealers=json.loads(dealer_res.text)['data']['list'] 50 return dealers 51 52 53 def get_item(dealer, province_name, city_name): 54 item={ 55 "dealer_name":dealer["fullname"], 56 "brand_id": None, 57 "address": dealer["address"], 58 "brand": "丰田", 59 "province": province_name, 60 "city": city_name, 61 "sale_call": dealer["phone_seal"], 62 "customer_service_call": dealer["phone_service"], 63 "update_time": date.today(), 64 "longitude": dealer['lng'], 65 "latitude": dealer['lat'], 66 "dealer_type": None, 67 "manufacturer_id": None, 68 "manufacturer": "丰田汽车", 69 "state": None, 70 "opening_date": None, 71 "close_date": None, 72 "dealer_id_web": None, 73 "controlling_shareholder": None, 74 "other_shareholders": None, 75 "status": None, 76 "remarks": None, 77 78 } 79 80 return item 81 82 def get_items(): 83 items=[] 84 provinces_list =get_Provinces() 85 for provinces_dict in provinces_list: 86 provinces_cid =provinces_dict.get('cid') 87 provinces_name=provinces_dict.get('name') 88 # print(provinces_cid,provinces_name) 89 90 cities =get_Citys(provinces_cid) 91 for city in cities: 92 city_id =city.get('cid') 93 city_name=city.get('name') 94 # print(city_id,city_name) 95 dealers=get_Dealers(city_id,provinces_cid) 96 for dealer in dealers: 97 98 print(dealer) 99 item=get_item(dealer,provinces_name,city_name) 100 items.append(item) 101 102 return items 103 104 105 106 def main(): 107 print('爬虫开始.....') 108 items=get_items() 109 110 print('等待数据存储') 111 pg =PgsqlPipeline() 112 for item in items: 113 pg.process_item(item) 114 pg.close() 115 print('数据存储完成!!!') 116 117 if __name__ == '__main__': 118 main()