crawl.py

>crawl.py http://www.hao123.com/index.htm

结果如下:

parsedurl =  ParseResult(scheme='http', netloc='www.hao123.com', path='/index.htm', params='', query='', fragment='')
path = 
www.hao123.com/index.htm
ext =  ('www.hao123.com/index', '.htm')
path = 
www.hao123.com/index.htm
ldir =  www.hao123.com
ldir =  www.hao123.com
path =  www.hao123.com/index.htm
self.url =  http://www.hao123.com/index.htm
self.file =  www.hao123.com/index.htm
retval =  ('www.hao123.com/index.htm', <httplib.HTTPMessage instance at 0x010F9968>)

( 1 )
URL:
http://www.hao123.com/index.htm
FILE: www.hao123.com/index.htm
http://www.hao123.com                                         ... new, added to Q
http://www.hao123.com/redian/tongzhi.htm                      ... new, added to Q
http://utility.hao123.com/quality_form.php                    ... discarded, not in domain
*  javascript:void(0)                                            ... discarded, javascript
http://www.hao123.com/redian/scookie.htm                      ... new, added to Q
*  javascript:void(0)                                            ... discarded, javascript
*  javascript:void(0)                                            ... discarded, javascript
*  javascript:void(0)                                            ... discarded, javascript
http://www.hao123.com                                         ... discarded, already in Q
http://wenku.baidu.com                                        ... discarded, not in domain
http://baike.baidu.com                                        ... discarded, not in domain
http://jingyan.baidu.com                                      ... discarded, not in domain
http://hi.baidu.com                                           ... discarded, not in domain
http://top.baidu.com                                          ... discarded, not in domain
http://dict.baidu.com                                         ... discarded, not in domain
http://s.baidu.com                                            ... discarded, not in domain
http://www.baidu.com                                          ... discarded, not in domain
http://www.hao123.com/daquan/shfwsite.htm                     ... new, added to Q
http://www.hao123.com/netbuy.htm                              ... new, added to Q
http://www.hao123.com/caipiao.htm                             ... new, added to Q
http://www.hao123.com/haoserver/index.htm                     ... new, added to Q
http://www.hao123.com/tianqi.htm                              ... new, added to Q
http://www.hao123.com/stock.htm                               ... new, added to Q
http://www.hao123.com/stock3.htm                              ... new, added to Q
http://www.hao123.com/bankjt.htm                              ... new, added to Q
http://www.hao123.com/lvyou.htm                               ... new, added to Q

..........

(scrapy_env) C:\Users\Lenovo\nepu_spider>scrapy crawl test -o test_output.json Traceback (most recent call last): File "D:\annaCONDA\Lib\site-packages\scrapy\spiderloader.py", line 77, in load return self._spiders[spider_name] ~~~~~~~~~~~~~^^^^^^^^^^^^^ KeyError: 'test' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "D:\annaCONDA\Scripts\scrapy-script.py", line 10, in <module> sys.exit(execute()) ^^^^^^^^^ File "D:\annaCONDA\Lib\site-packages\scrapy\cmdline.py", line 158, in execute _run_print_help(parser, _run_command, cmd, args, opts) File "D:\annaCONDA\Lib\site-packages\scrapy\cmdline.py", line 111, in _run_print_help func(*a, **kw) File "D:\annaCONDA\Lib\site-packages\scrapy\cmdline.py", line 166, in _run_command cmd.run(args, opts) File "D:\annaCONDA\Lib\site-packages\scrapy\commands\crawl.py", line 24, in run crawl_defer = self.crawler_process.crawl(spname, **opts.spargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\annaCONDA\Lib\site-packages\scrapy\crawler.py", line 232, in crawl crawler = self.create_crawler(crawler_or_spidercls) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\annaCONDA\Lib\site-packages\scrapy\crawler.py", line 266, in create_crawler return self._create_crawler(crawler_or_spidercls) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\annaCONDA\Lib\site-packages\scrapy\crawler.py", line 346, in _create_crawler spidercls = self.spider_loader.load(spidercls) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\annaCONDA\Lib\site-packages\scrapy\spiderloader.py", line 79, in load raise KeyError(f"Spider not found: {spider_name}") KeyError: 'Spider not found: test'
07-04
2025-06-29 10:48:57 [scrapy.utils.log] INFO: Scrapy 2.13.2 started (bot: scrapybot) 2025-06-29 10:48:57 [scrapy.utils.log] INFO: Versions: {'lxml': '5.4.0', 'libxml2': '2.11.9', 'cssselect': '1.3.0', 'parsel': '1.10.0', 'w3lib': '2.3.1', 'Twisted': '25.5.0', 'Python': '3.9.23 (main, Jun 5 2025, 13:25:08) [MSC v.1929 64 bit (AMD64)]', 'pyOpenSSL': '25.1.0 (OpenSSL 3.5.0 8 Apr 2025)', 'cryptography': '45.0.4', 'Platform': 'Windows-10-10.0.22631-SP0'} Traceback (most recent call last): File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\spiderloader.py", line 106, in load return self._spiders[spider_name] KeyError: 'taobao_spider' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\runpy.py", line 87, in _run_code exec(code, run_globals) File "C:\Users\黎晓容\.conda\envs\pythonProject8\Scripts\scrapy.exe\__main__.py", line 7, in <module> File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\cmdline.py", line 205, in execute _run_print_help(parser, _run_command, cmd, args, opts) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\cmdline.py", line 158, in _run_print_help func(*a, **kw) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\cmdline.py", line 213, in _run_command cmd.run(args, opts) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\commands\crawl.py", line 33, in run crawl_defer = self.crawler_process.crawl(spname, **opts.spargs) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\crawler.py", line 338, in crawl crawler = self.create_crawler(crawler_or_spidercls) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\crawler.py", line 374, in create_crawler return self._create_crawler(crawler_or_spidercls) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\crawler.py", line 458, in _create_crawler spidercls = self.spider_loader.load(spidercls) File "C:\Users\黎晓容\.conda\envs\pythonProject8\lib\site-packages\scrapy\spiderloader.py", line 108, in load raise KeyError(f"Spider not found: {spider_name}") KeyError: 'Spider not found: taobao_spider'
06-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值