python爬取照片

最新推荐文章于 2021-03-26 06:22:39 发布

ZCJY

最新推荐文章于 2021-03-26 06:22:39 发布

阅读量663

点赞数 5

分类专栏：爬虫文章标签： python

本文链接：https://blog.youkuaiyun.com/qq_44099721/article/details/105471528

版权

爬虫专栏收录该内容

1 篇文章

订阅专栏

import requests
import urllib.request
from bs4 import BeautifulSoup
import os
import time


# 获得每种品牌的汽车
def get_car_brand_url(base_url):
    car_brand = 'https://car.autohome.com.cn'
    car_brand_list = []
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
    url_all = soup.find_all('a')
    for item in url_all:
        car_brand_list.append(car_brand + item.get('href'))
    return car_brand_list


# 获取每个品牌的所以不同类的汽车
def get_car_brand_class_url(car_url_list):
    car_class_base = 'https://car.autohome.com.cn'
    car_class_list = []
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_url_list:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        url_list = soup.find('div', {'class': 'uibox-con carpic-list02'}).find_all('a')
        for a in url_list:
            car_class_list.append(car_class_base + a.get('href'))
    return car_class_list


def get_brand_class_image_url(car_class_list):
    car_image_url = []
    car_base = 'https://car.autohome.com.cn'
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_class_list:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        # car_image_url.append(car_base + soup.find('div', {'class': 'uibox-con carpic-list02'}).find('a').get('href'))
        car_image_url.append(car_base + soup.find('ul', {'class': 'search-pic-sortul'}).find('a').get('href'))
    return car_image_url


def download_image(car_image_url, folder_path):
    if not os.path.exists(folder_path):  # 判断文件夹是否已经存在
        os.makedirs(folder_path)  # 创建文件夹
    car_base = 'https://car.autohome.com.cn'
    headers = {'User-Agent': 'Mozilla/5.0'}
    for item in car_image_url:
        response = requests.get(item, headers=headers)
        soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser')
        class_all_image = soup.find('div', {'class': 'uibox-con carpic-list03 border-b-solid'}).find_all('img')
        index = 0
        for src in class_all_image:
            image_url = 'http:' + src.get('src')
            img_name = os.path.join(folder_path, '{}.jpg'.format(index))
            # img_name = folder_path + str(index) + '.jpg'
            image = requests.get(image_url)
            with open(img_name, 'wb') as file:  # 以byte形式将图片数据写入
                file.write(image.content)
                file.flush()
            # file.close()  # 关闭文件
            print('第%d张图片下载完成' % index)
            index += 1


base_url = 'http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=2%20&brandId=0%20&fctId=0%20&seriesId=0'

image_url_list = get_car_brand_url(base_url)
car_class_list = get_car_brand_class_url(image_url_list)
# car_class_list = get_car_brand_class_url(['https://car.autohome.com.cn/pic/series/4482.html#pvareaid=2042214'])
car_image_url = get_brand_class_image_url(car_class_list)
folder_path = r'./car_images'
download_image(car_image_url, folder_path)

在这里插入图片描述