使用requests包与百度图片服务器进行交互,得到返回的json数据后进行解析,最终获得百度图片的实际地址
import requests
from fake_useragent import UserAgent
def imgUrls(keyWord, userAgent, pn=0, rn=30):
url = 'https://image.baidu.com/search/index'
params = {
'tn': 'resultjson_com',
'ipn': 'rj',
'ct': '201326592',
'is': '',
'fp': 'result',
'queryWord': keyWord,
'cl': '2',
'lm': '-1',
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': '-1',
'z': '',
'ic': '0',
'word': keyWord,
's': '',
'se': '',
'tab': '',
'width': '',
'height': '',
'face': '0',
'istype': '2',
'qc': '',
'nc': '1',
'fr': '',
'pn': 0, # 当前请求的图片序号
'rn': 30, # 取多少个图片
'gsm': '1e',
'1491808945838': ''
}
rep = requests.get(url, headers={'user-Agent': userAgent}, params=params)
if(int(rep.status_code) == 200):
try:
imgs = rep.json()
"""解析返回的url地址"""
def decodeUrl(imgUrl):
longDic={'_z2C$q': ":",'_z&e3B': ".",'AzdH3F': "/"}
mapDic={'w': "a",'k': "b",'v': "c",'1': "d",'j': "e",'u': "f",'2': "g",'i': "h",'t': "i",'3': "j",'h': "k",'s': "l",'4': "m",'g': "n","5": "o",'r': "p",'q': "q","6": "r",'f': "s",'p': "t","7": "u",'e': "v",'o': "w","8": "1",'d': "2",'n': "3","9": "4",'c': "5",'m': "6","0": "7",'b': "8",'l': "9",'a': "0"}
for k in longDic:
imgUrl=imgUrl.replace(k,longDic[k])
imgUrl=list(imgUrl)
tmp=[]
for i in imgUrl:
if i in mapDic:
tmp.append(mapDic[i])
else:
tmp.append(i)
return ''.join(tmp)
imgUrls = [decodeUrl(imgs['data'][sec]['objURL'])
for sec in range(len(imgs['data']) - 1)]
result = imgUrls
status = True
except Exception as err:
result = str(err)
status = False
finally:
return {'result': result, 'status': status}
if __name__ == '__main__':
ua = UserAgent()
urls = imgUrls(keyWord='美女', userAgent=ua.random)
print(urls)