# -*- coding: utf-8 -*-爬虫入口 import scrapy from scrapypc.items import ScrapypcItem class AppSpider(scrapy.Spider): name = 'app' allowed_domains = ['meizitu.com'] # allowed_domains = [] start_urls = ['http://www.meizitu.com/a/more_1.html'] # rules = Rule() def parse(self, response): item =ScrapypcItem() item['image_name'] = response.xpath('//img//@src').extract() # 提取图片链接 yield item #构建翻页 for i in range(2,73): new_url ="http://www.meizitu.com/a/"+"more_%d.html" %i if new_url: yield scrapy.Request(new_url, callback=self.parse)
#settings模块设置
ITEM_PIPELINES = { # 'scrapypc.pipelines.ScrapypcPipeline': 300, 'scrapy.pipelines.images.ImagesPipeline': 1, } # 要保存的字段,即在 Item 类中的字段名为 image_url IMAGES_URLS_FIELD = 'image_name' import os # 配置数据保存路径,为当前工程目录下的 images 目录中 project_dir = os.path.abspath(os.path.dirname(__file__)) IMAGES_STORE = os.path.join(project_dir, 'images')