创建spider文件Bookchina.py:
import scrapy
from scrapy import Request
from MyScrapy.items import BookChina
class BookchinaSpider(scrapy.Spider):
name = 'Bookchina'
#allowed_domains = ['blog.jobbole.com/114261/']
start_urls = ['http://www.bookschina.com/kinder/54290000/']
def parse(self, response):
boolList = response.css('.cover a::attr(href)').extract()
for blist in boolList:
link = 'http://www.bookschina.com' + blist
yield Request(url=link, callback=self.bookDetail)
def categoryParse(self,response):
boolList=response.css('.cover a::attr(href)').extract()
for blist in boolList:
link='http://www.bookschina.com'+blist
yield Request(url=link,callback=self.bookDetail)
def bookDetail(self,response):
bookName = response.css('.padLeft10 h1::text').extract_first('')
print(bookName)
bookEditer = response.css('.