使用scrapy的images模块,必须先安装pillow
$ pip install pillow
在settings.py中添加
# -*- coding: utf-8 -*-
# Scrapy settings for topgoods project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'topgoods'
SPIDER_MODULES = ['topgoods.spiders']
NEWSPIDER_MODULE = 'topgoods.spiders'
DOWNLOADER_MIDDLEWARES = {
'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware':301,
}
###########################################################################3
ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}
IMAGES_URLS_FIELD = 'file_urls' ############file_urls是指item的key值
IMAGES_STORE = r'.' ####存储位置,此处为本文件夹下存储
# IMAGES_THUMBS = { ###########定义图片存储时的大小
# 'small': (50, 50),
# 'big': (270, 270),
# }
###########################################################################
LOG_FILE = "scrapy.log"
只需添加##### ###中的内容即可。
当然我们也可以存储图片的大小。
# IMAGES_THUMBS = {
# 'small': (50, 50),
# 'big': (270, 270),
# }