# @file :百度爱采购ajax.py
# @Softwara : PyCharm
import condition as condition
import requests
import re
import csv
from time import sleep
import os
#kw = input("请输入一个查询关键词:")
kw="江西"
file_name = '百度爱采购-%s.csv'%kw
f = open(file_name,mode='a',encoding='gbk')
cscwrite = csv.writer(f)
num = 0
for i in range(1,100):
params = {
'ajax': '1',
'q': kw,
'p': i,
'sa':'',
'mk': '全部结果',
's': '30',
'adn': '0',
'resType': 'product',
'from': 'search'
}
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
response = requests.get('https://b2b.baidu.com/s/a',params=params,headers=headers).json()
name_list = response['data']['productList']
list_num = len(name_list)
print(len(name_list))
for name in name_list:
corporate_name = name.get('fullProviderName') #获取到公司名称
#构造公司联系页链接
#url = f'https://b2b.baidu.com/shop?name={corporate_name}&xzhid=31231576&fid=0%2C1727148872616&pi=b2b.land....9846083244329408&tpath=contact'
#xzhid=name.get('xzhid')
jUrl=name.get('jUrl')
jump_url=name.get('jumpUrl')
#print(jump_url)
match = re.search(r'xzhid=(\d+)', jump_url)
if match:
xzhid = match.group(1)
#print(xzhid)
else:
xzhid=name.get('xzhid')
url = f'https://b2b.baidu.com/shop?name={corporate_name}&xzhid={xzhid}&tpath=contact&fid=&pi='
#print(url)
resp = requests.get(url=url,headers=headers).text
contact_name = re.findall('"contactName":"(.*?)"', resp, re.S) #联系人
external_Address=re.findall('"externalAddress":"(.*?)"', resp, re.S) #公司地址
if contact_name:
# contact_name[0] 是字符串形式的Unicode编码
contactName = contact_name[0].encode('utf-8').decode('unicode_escape')
else:
contactName="无"
if external_Address:
# contact_name[0] 是字符串形式的Unicode编码
externalAddress = external_Address[0].encode('utf-8').decode('unicode_escape')
else:
externalAddress="无"
phoneNumber = re.findall('"phoneNumber":"(.*?)"', resp, re.S) #手机
telNumber = re.findall('"telNumber":"(.*?)"', resp, re.S) #电话
faxNumber = re.findall('"faxNumber":"(.*?)"', resp, re.S) #传真
qqNumber = re.findall('"qqNumber":"(.*?)"', resp, re.S) #qq号码
wechatNumber = re.findall('"wechatNumber":"(.*?)"', resp, re.S) #微信
email = re.findall('"email":"(.*?)"', resp, re.S) #电子邮箱
print(corporate_name,contactName,externalAddress, phoneNumber, telNumber, telNumber, faxNumber, qqNumber, wechatNumber, email)
cscwrite.writerow([corporate_name,contactName,externalAddress, phoneNumber, telNumber, telNumber, faxNumber, qqNumber, wechatNumber, email])
sleep(0.05)
num +=1
print('---第%s页采集完成---'%i )
if (list_num == 0):
f.close()
print('---爬取完毕,共获取到%d条数据---' % num)
os._exit(0)
Python 百度爱采购ajax-获取到公司名称,电话
于 2024-10-26 23:38:42 首次发布
部署运行你感兴趣的模型镜像
您可能感兴趣的与本文相关的镜像
Python3.10
Conda
Python
Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本
3255





