常用参数含义:
BOT_NAME = 'demo1'
SPIDER_MODULES = ['demo1.spiders']
NEWSPIDER_MODULE = 'demo1.spiders'
USER_AGENT = 'demo1 (+http://www.yourdomain.com)'
ROBOTSTXT_OBEY = True
CONCURRENT_REQUESTS = 32
DOWNLOAD_DELAY = 3
CONCURRENT_REQUESTS_PER_DOMAIN = 16
CONCURRENT_REQUESTS_PER_IP = 16
COOKIES_ENABLED = False
TELNETCONSOLE_ENABLED = False
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
}
SPIDER_MIDDLEWARES = {
'demo1.middlewares.Demo1SpiderMiddleware': 543,
}
DOWNLOADER_MIDDLEWARES = {
'demo1.middlewares.MyCustomDownloaderMiddleware': 543,
}
EXTENSIONS = {
'scrapy.extensions.telnet.TelnetConsole': None,
}
ITEM_PIPELINES = {
'demo1.pipelines.Demo1Pipeline': 300,
}
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 5
AUTOTHROTTLE_MAX_DELAY = 60
AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
AUTOTHROTTLE_DEBUG = False
HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_HTTP_CODES = []
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
LOG_LEVEL = 'WARNING'
"""
Scrapy提供5层logging级别:
CRITICAL - 严重错误(critical)
ERROR - 一般错误(regular errors)
WARNING - 警告信息(warning messages)
INFO - 一般信息(informational messages)
DEBUG - 调试信息(debugging messages)
scrapy默认显示DEBUG级别的log信息
"""
LOG_FILE = './log.log'
