python+ajax实现今日头条街拍图片爬取
from urllib.parse import urlencode,quote
import urllib.request
import requests
import os
from hashlib import md5
def get_page(num):
keyword = '街拍'
base_url = 'https://www.toutiao.com/search_content/?'
params = {
'offset':num,
'format':'json',
'keyword':keyword,
'autoload':'true',
'count':'20',
'cur_tab':'1',
'from':'search_tab'
}
url = base_url+urlencode(params)
try:
response = requests.get(url)
if response.status_code == 200:
return response.json()
except requests.ConnectionError:
return None
def get_image(json):
if json.get('data'):
for item in json.get('data'):
title = item.get('title')
images = item.get('image_list')
for image in images:
yield {
'image':image.get('url'),
'title':title
}
def save_image(item):
if not os.path.exists(item.get('title')):
os.mkdir(item.get('title'))
try:
response = requests.get(item.get('image'))
if response.status_code == 200:
filepath = '{0}/{1}.jpg'.format(item.get('title'),md5(response.content).hexdigest())
if not os.path.exists(filepath):
with open(filepath, 'wb') as fh:
fh.write(response.content)
else:
print('Already Download!')
except requests.ConnectionError:
print('Fail to save image!')
if __name__ == '__main__':
for i in range(1,21):
json = get_page(i*20)
for item in get_image(json):
print(item)
save_image(item)