import requests
import os
import zipfile
'''
爬取百度图片
'''
imgnum = 0
def get_url(starpagenum,keyword):
params={
'tn': 'resultjson_com',
'ipn': 'rj',
'ct': '201326592',
'is': '',
'fp': 'result',
'cl': '2',
'lm': '-1',
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': '-1',
'z': '',
'ic':' 0',
'queryWord': keyword,
'word': keyword,
's': '',
'se': '',
'tab':'',
'width': '',
'height': '',
'face': '0',
'istype':'2',
'qc': '',
'nc': '1',
'fr':'',
'pn': starpagenum+30,
'rn': '30',
}
url = 'https://image.baidu.com/search/acjson'
request = requests.get(url,params=params)
request.encoding="utf-8"
json =request.json().get('data')
for item in json:
if item == {}:
return
yield{
'url':item.get('thumbURL'),
'name':item.get('fromPageTitleEnc')[1:10]
}
def save_imgs(imgs):
filepath = r"E:\python\pythonzong\爬虫\request\imgbaidutu\\"
for item in imgs:
url = item.get('url')
name = ''.join(item.get('name').split()).replace('*','')
global imgnum
imgnum += 1
filename = filepath+str(str(imgnum)+name+".jpg")
print(filename)
request = requests.get(url)
with open(filename,'wb') as file:
file.write(request.content)
def save_zip(newpath,path):
print("开始压缩文件")
with zipfile.ZipFile(newpath,'w',zipfile.ZIP_DEFLATED) as zp:
for path, fileparename, filename in os.walk(path): #path fileparename filename 路劲,文件夹名,文件名
for name in filename:
zp.write(os.path.join(path,name)) # 把要压缩的文件写入压缩
print("压缩文件成功")
if __name__ == '__main__':
keyword = "美女"
for num in range(0,10):
save_imgs(get_url(num,keyword))
path = r"E:\python\pythonzong\爬虫\request\imgbaidutu"
zippath = "E:\\python\\pythonzong\\爬虫\\request\\"+keyword+".zip"
save_zip(zippath,path)
这里我们可以换关键字和页数都是自己换的