立即学习:https://edu.youkuaiyun.com/course/play/24797/282246?utm_source=blogtoedu
# -*- coding: utf-8 -*- # Define here the models for your scraped items # # See documentation in: # https://docs.scrapy.org/en/latest/topics/items.html import scrapy class ZhipinspiderItem(scrapy.Item): # 款式 design = scrapy.Field() # 购买时间 buy_date = scrapy.Field() # 地址 buy_addr = scrapy.Field() # 裸车价 real_price = scrapy.Field() # 指导价 original_price = scrapy.Field()
# -*- coding: utf-8 -*- import scrapy from ZhipinSpider.items import ZhipinspiderItem # 页数1-61页 # http://luochejia.yiche.com/yiqifengtianrav4/price/?page=1 class TestScrapySpider(scrapy.Spider): # 蜘蛛的名字 name = 'test_scrapy' # 定义蜘蛛只爬取哪写域名 allowed_domains = ['luochejia.yiche.com'] # 从哪个页面开始爬 urls = ['http://luochejia.yiche.com/yiqifengtianrav4/price/?page=%s' % i for i in range(1, 62)] start_urls = urls # 该response就代表Scrapy下载器所获取的目标响应 def parse(self, response): # 每个job_primary元素包含一个工作信息 for car_primary in response.xpath('//div[@class="price-list-box"]'): item = ZhipinspiderItem() # 款式 item['design'] = car_primary.xpath('./div[@class="con-box"]/div[@class="tit"]/text()').extract_first() # 购买时间 item['buy_date'] = car_primary.xpath('./div[@class="con-box"]/p[@class="other"]/text()').extract_first() # 地址 item['buy_addr'] = car_primary.xpath('./div[@class="con-box"]/p[@class="other"]/text()').extract_first() # 裸车价 item['real_price'] = car_primary.xpath( './div[@class="con-box"]/div[@class="price"]/p[@class="luochejia"]/em/text()').extract_first() # 指导价 item['original_price'] = car_primary.xpath( './div[@class="con-box"]/div[@class="price"]/p[@class="zhidaojia"]/text()').extract_first() yield item