通常有两种方法,一种是使用meta进行参数传递。另一种是使用lambda进行参数传递。
方法一:
使用meta进行参数传递。
举例如下:
from scrapy.spiders import Spider
import scrapy
import FirmCrawler.items as MI
from sets import Set
import time
import urlparse
class RicohSpider(Spider):
name = "ricoh"
allowed_domain = ["www.ricoh-imaging.com.cn"]
start_urls = [
"http://www.ricoh-imaging.com.cn/ricoh/service_download.html"
]
allsuffix = Set()
timeout = 20
trytimes = 3
headurl = "http://www.ricoh-imaging.com.cn/"
def parse(self, response):
tr_list_1 = response.xpath(".//*[@id='content2']/div[4]/table[1]/tr[position()>1]")
for tr in tr_list_1:
href = tr.xpath("./td[2]/a/@href").extract().pop()
url = urlparse.urljoin(self.headurl, href)
prductVersion = tr.xpath("./td[3]/text()").extract().pop()
filename = tr.xpath("./td[1]/text()").extract().pop()
print filename
desc = tr.xpath("./td[2]/a/text()").extract().pop()
# print desc
request = scrapy.FormRequest(url, callback=self.parse_page, meta = {'filename':filename,'productVersion':prductVersion,'desc':desc})
yield request
def parse_page(self, response):
print response.url
publish_Time = response.xpath(".//*[@id='content2']/div[4]/div[3]/table/tbody/tr[4]/td[2]/p/span[1]/text()").extract()
if publish_Time:
publishTime = publish_Time.pop()
else:
publishTime = ""
print "test:"+response.meta['filename']
print "productVersion:" + response.meta['productVersion']
print "desc:" + response.meta['desc']
主要方法:在scrapy.FormRequest方法中添加meta参数,meta使用字典形式的表达形式。
request = scrapy.FormRequest(url, callback=self.parse_page, meta = {'filename':filename,'productVersion':prductVersion,'desc':desc})
yield request
在被调用的函数中引入meta字典的key:
# response.meta['key']
print "filename:"+response.meta['filename']
print "productVersion:" + response.meta['productVersion']
print "desc:" + response.meta['desc']
方法二:
使用lambda函数进行参数传递。
举例如下:
def parse(self, response):
tr_list_1 = response.xpath(".//*[@id='content2']/div[4]/table[1]/tr[position()>1]")
tr_list_2 = response.xpath(".//*[@id='content2']/div[4]/table[2]/tr[position()>1]")
tr_list = tr_list_1 + tr_list_2
for tr in tr_list:
href = tr.xpath("./td[2]/a/@href").extract().pop()
url = urlparse.urljoin(self.headurl, href)
prduct_Version = tr.xpath("./td[3]/text()").extract()
if prduct_Version:
productVersion = prduct_Version.pop()
else:
productVersion = ""
productModel = tr.xpath("./td[1]/text()").extract().pop()
desc = tr.xpath("./td[2]/a/text()").extract().pop()
# request = scrapy.FormRequest(url, callback=self.parse_page,
# meta={'productModel': productModel, 'productVersion': prductVersion, 'desc': desc})
# yield request
request = scrapy.FormRequest(url, callback=lambda response, pm = productModel,pv= productVersion,dc = desc : self.parse_page(response, pm, pv, dc), dont_filter=True)
yield request
def parse_page(self, response, pm, pv , dc ):
print pm,pv,dc
使用lambda进行参数传递,注意参数引用传递方式:pm = productVersion
使用meta的时候是不需要使用在被调用的函数之中添加传递的参数。
request = scrapy.FormRequest(url, callback=lambda response, pm = productModel,pv= productVersion,dc = desc : self.parse_page(response, pm, pv, dc), dont_filter=True)
yield request
def parse_page(self, response, pm, pv , dc ):
print pm,pv,dc
参考链接:https://blog.youkuaiyun.com/benben0729/article/details/80848661
https://blog.youkuaiyun.com/showhilllee/article/details/72871141
https://blog.youkuaiyun.com/rgc_520_zyl/article/details/78946974