import json
import os
import re
from _md5 import md5
import random
import user_agent
import requests
from json import loads
image_list = []
def get_url():
url = 'https://www.toutiao.com/search_content/?offset=0&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=3&from=gallery&pd='
result = requests.get(url)
article_url = []
if result.status_code == 200 :
info = loads(result.text)
get_info = info['data']
# print(type(get_info))#列表信息;
for i in get_info:
# print(type(i))#字典类型;
article_url.append(i['article_url'])
return article_url
# for key in i:#键名;
# print(key)
header = {'user-agent': 'user_agent'}
def parse_url(url):
try:
for i in url:
result = requests.get(i,headers=header)
info = re.search(r'JSON.parse\("(.*?)"\),',result.text)
image_info = re.search(r'url\\":\\"(.*?)\\"',info.group(1),re.S)
image_url = re.sub('\\\\','',image_info.group(1))
image_list.append(image_url)
for i in image_list:
image = requests.get(i)
path = 'E:\python\练习\python_try\image'
file_path = '{0}/{1}.{2}'.format(path, md5(image.content).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(image.content)
print('正在下载。。。',i)
f.close()
except AttributeError:
pass
if __name__ == '__main__':
url = get_url()
parse_url(url)