import requests
import re
import os
num=1
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
"referer":"https://image.baidu.com"
}
for i in range(1,3):
url="http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord+=&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&word=%E6%88%90%E9%BE%99&z=&ic=&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&fr=&step_word=%E6%88%90%E9%BE%99&pn={}&rn=30".format(i*30)
html=requests.get(url)
html.encoding="utf-8"
c=html.json()["data"]
for i in c:
num=num+1
c=i.get("thumbURL")
if c!=None:
c=requests.get(c,headers=headers)
print(c)
c=c.content
with open(r'H:/chenglong1/' + str(num) + '.jpg', 'wb') as f:
f.write(c)
f.close()
二爬的感觉很不一样 尤其是json还是没有处理好
表头文件referer 是判断从那个网站点击的这个链接 在爬虫中必不可少
不能一味的去只用[]来搜寻Key值 有些key值为空而get很好的解决了这个问题