1.robots.txt协议
ROBOTSTXT_OBEY = False
需要激活并修改为False
DOWNLOAD_DELAY = 0.5
download_delay需要激活,并设置时间,降低爬取速度
COOKIES_ENABLED = False
禁用cookie追踪
#SPIDER_MIDDLEWARES = {
# 'IvskySpider.middlewares.IvskyspiderSpiderMiddleware': 543,
#}
用于设置那些middlewares有效
# Enable or disable downloader middlewares
# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
'IvskySpider.middlewares.UserAgentMiddleware': 543,
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
# 数字表示优先级 越小越先执行 如果填为none 表示不执行
}
# 哪些pipline可以使用
#ITEM_PIPELINES = {
# 'IvskySpider.pipelines.IvskyspiderPipeline': 300,
#}