详细步骤请看https://blog.youkuaiyun.com/qq_42488087/article/details/95041662,这位兄弟解释的非常清楚,代码可结合看
import requests
from urllib.parse import urlencode
import os
from hashlib import md5
def get_html():
params = {
"aid": 24,
"app_name": "web_search",
"offset": 20,
"format": "json",
"keyword": "街拍",
"autoload": "True",
"count": 20,
"en_qc": 1,
"cur_tab": 1,
"from": "search_tab",
"pd": "synthesis"
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
"cookie": "tt_webid=6720753355788125704; WEATHER_CITY=%E5%8C%97%E4%BA%AC; tt_webid=6720753355788125704; csrftoken=646477858a679be0057ab5066c08d20f; UM_distinctid=16c5530fc5d2ea-0d78efed035bcd-c343162-100200-16c5530fc60520; s_v_web_id=ac25b6c038c890851e87d96b0612296b; __tasessionId=ba7ul3ziv1564823494399; CNZZDATA1259612802=1690896445-1564792381-https%253A%252F%252Fwww.toutiao.com%252F%7C1564819381"
}
#urlencode()把json数据转化成一条用&连接的字符串
url = "https://www.toutiao.com/api/search/content/?" + urlencode(params)
print(url)
# 获取session
session = requests.Session()
# 调用请求 将请求头拼接
response = session.get(url, headers=headers)
if response.status_code == 200:
return response.json()
def save_img():
html = get_html()
datas = html['data']#得到data里的值
for data in datas:
if data.get('title') != None:
image_list_url = data.get('image_list')
image_title = data.get('title')
if not os.path.exists(image_title):
os.mkdir(image_title)
for image_url in image_list_url:
response = requests.get(image_url.get('url'))
#通过哈希函数对每个文件进行文件名的自动生成
file_path = '{0}/{1}.{2}'.format(image_title,md5(response.content).hexdigest(),'jpg')
if not os.path.exists(file_path):
with open(file_path,'wb') as f:
f.write(response.content)
save_img()()