9.scrapy 模版

items

import scrapy

class PachongScrapyItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    #想爬的内容
    title = scrapy.Field()
    title_link = scrapy.Field() #容器存储
    comment = scrapy.Field()

spider/dd.py

import scrapy
from pachong_scrapy.items import PachongScrapyItem
from scrapy.http import Request

class DdSpider(scrapy.Spider):
    name = 'dd'
    allowed_domains = ['dangdang.com']
    start_urls = ['http://category.dangdang.com/pg1-cid10010337.html'] #第一页

    def parse(self, response):
        item = PachongScrapyItem()
        item["title"] =  response.xpath("//a[@name='itemlist-title']/@title").extract()  #Xpath表达式  extract提取
        item["title_link"] = response.xpath("//a[@name='itemlist-title']/@href").extract()
        item["comment"] = response.xpath("//a[@name='itemlist-review']/text()").extract()
        # print(item['title'], item["title_link"], item["comment"])
        yield item

        for i in range(2, 81):  #多页爬虫
            url = 'http://category.dangdang.com/pg'+str(i)+'-cid10010337.html'
            yield  Request(url, callback=self.parse) #设置回调函数

piplines

class PachongScrapyPipeline(object):
    def process_item(self, item, spider):
        for i in range(len(item['title'])):
            try:
                title = item['title'][i]
                title_link = item['title_link'][i]
                comment = item['comment'][i]
                print('{}:{}:{}'.format(title, title_link, comment))
            except Exception as err:
                print(err)
        return item
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值