昨天自己搞了一个下午加晚上,真的是心力憔悴。主要就是不知道imagespipeline怎么使用,问题在代码中有解释.
在settings.py中设置
ITEM_PIPELINES = {
#'car_bsj.pipelines.CarBsjPipeline': 300,
'car_bsj.pipelines.CARBSJImagesPipeline': 1
}
#这里是储存文件的文件目录
IMAGES_STORE = os.path.join(os.path.dirname(os.path.dirname(__file__)),'images')
直接上代码
spider.py
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from car_bsj.items import CarBsjItem
class BsjSpider(CrawlSpider):
name = 'bsj'
allowed_domains = ['autohome.com.cn']
start_urls = ['https://car.autohome.com.cn/pic/series/162.html']
rules = (
Rule(LinkExtractor(allow=r'.+pic/series/162-.+\.html'),
callback='parse_item', follow=True),
)
def parse_item(self, response):
box_