# coding=utf-8'''
Created on 2017年8月16日
@author: Lihhz
'''from spider.url_manager import UrlManager
from spider.html_downloader import HtmlDownloader
from spider.html_parser import HtmlParser
import logging
import thread
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',
filename='myapp.log',
filemode='w')
urlManager = UrlManager();
htmlDownloader = HtmlDownloader('d:/test1')
htmlParser = HtmlParser()
url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=%s&gsm=&ct=&ic=0&lm=-1&width=0&height=0'classMain(object):def__init__(self,rootUrl):
self.rootUrl = rootUrl
defdownload(self,u):
htmlContent = htmlDownloader.downloadHtml(u)
imageUrls = htmlParser.parseHtml('',htmlContent)
htmlDownloader.downloadImage(imageUrls)
defcraw(self):for i in range(0,100):#[::-1]:
u = url % (i*20)
logging.info('======%s' % (u))
thread.start_new_thread(self.download, (u,))
if __name__ == '__main__':
main = Main('http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=0&gsm=50&ct=&ic=0&lm=-1&width=0&height=0')
main.craw()
错误信息
pydev debugger: starting (pid: 10112)
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)
Traceback (most recent call last):
File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)
File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)
File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)
Unhandled exception in thread started by
sys.excepthook is missing
lost sys.stderr
Unhandled exception in thread started by
sys.excepthook is missing
lost sys.stderr
Unhandled exception in thread started by
sys.excepthook is missing
lost sys.stderr