用Python3爬去今日头像图片

最新推荐文章于 2025-04-21 11:40:36 发布

LiebeZQ

最新推荐文章于 2025-04-21 11:40:36 发布

阅读量214

点赞数

分类专栏： Python 文章标签：爬虫

本文链接：https://blog.youkuaiyun.com/LiebeZQ/article/details/98370452

版权

Python 专栏收录该内容

6 篇文章

订阅专栏

详细步骤请看https://blog.youkuaiyun.com/qq_42488087/article/details/95041662，这位兄弟解释的非常清楚,代码可结合看


import requests
from urllib.parse import urlencode
import os
from hashlib import md5

def get_html():
    params = {
        "aid": 24,
        "app_name": "web_search",
        "offset": 20,
        "format": "json",
        "keyword": "街拍",
        "autoload": "True",
        "count": 20,
        "en_qc": 1,
        "cur_tab": 1,
        "from": "search_tab",
        "pd": "synthesis"
    }

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
        "cookie": "tt_webid=6720753355788125704; WEATHER_CITY=%E5%8C%97%E4%BA%AC; tt_webid=6720753355788125704; csrftoken=646477858a679be0057ab5066c08d20f; UM_distinctid=16c5530fc5d2ea-0d78efed035bcd-c343162-100200-16c5530fc60520; s_v_web_id=ac25b6c038c890851e87d96b0612296b; __tasessionId=ba7ul3ziv1564823494399; CNZZDATA1259612802=1690896445-1564792381-https%253A%252F%252Fwww.toutiao.com%252F%7C1564819381"
        }

    #urlencode()把json数据转化成一条用&连接的字符串
    url = "https://www.toutiao.com/api/search/content/?" + urlencode(params)
    print(url)
    # 获取session
    session = requests.Session()
    # 调用请求 将请求头拼接
    response = session.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()


def save_img():
    html = get_html()
    datas = html['data']#得到data里的值
    for data in datas:
        if data.get('title') != None:
            image_list_url = data.get('image_list')
            image_title = data.get('title')
            if not os.path.exists(image_title):
                os.mkdir(image_title)
            for image_url in image_list_url:
                response = requests.get(image_url.get('url'))
                #通过哈希函数对每个文件进行文件名的自动生成
                file_path = '{0}/{1}.{2}'.format(image_title,md5(response.content).hexdigest(),'jpg')

                if not os.path.exists(file_path):
                    with open(file_path,'wb') as f:
                        f.write(response.content)


save_img()()