# -*- coding: utf-8 -*-"""filename : net_csdn_bbs_topics392220630.pyauthor: hu@daonao.com QQ: 443089607 weixin: huzhenghui weibo: http://weibo.com/443089607category : seleniumoriginal url : http://bbs.youkuaiyun.com/topics/392220630original title : python 爬虫 运行出错 求大神帮忙title : selenium爬虫需要注意检测页面加载和内容加载进度为了清晰直观展现python严格要求的缩进,发表在博客上详细说明见注释"""import datetimeimport loggingimport timeimport seleniumfrom selenium import webdriverlogging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')START_DATETIME = datetime.datetime.now()logging.debug('start at %s', START_DATETIME)WEB_DRIVER = webdriver.Firefox()logging.debug('type(WEB_DRIVER) : %s', type(WEB_DRIVER))WEB_DRIVER.get('http://www.baidu.com/')# get 后等到页面加载完成后返回logging.debug('request completed')LIST_WEBELEMENT = WEB_DRIVER.find_elements_by_name('wd')if len(LIST_WEBELEMENT) == 0: logging.debug('there are no web element which name is wd')else: WEBELEMENT = LIST_WEBELEMENT[0] WEBELEMENT.send_keys('huzhenghui') WEBELEMENT.send_keys(selenium.webdriver.common.keys.Keys.RETURN)STR_READY_STATE = ''# 而直接操作页面就需要类似于下面的代码等待页面加载完成while STR_READY_STATE != 'complete': time.sleep(0.001) STR_READY_STATE = WEB_DRIVER.execute_script('return document.readyState') logging.debug('STR_READY_STATE : %s', STR_READY_STATE)# 对于使用AJAX异步加载的页面来说,还需要类似于下面的代码检测AJAX加载的内容是否完成# 注意:这段程序没有考虑加载成功但是无内容的情况LIST_RESULT = list()while len(LIST_RESULT) == 0: time.sleep(0.001) LIST_RESULT = WEB_DRIVER.find_elements_by_class_name('result') logging.debug('len(LIST_RESULT) : %d', len(LIST_RESULT))for WEBELEMENT_RESULT in LIST_RESULT: logging.debug('Result : %s', str(WEBELEMENT_RESULT))
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
再分享一下我老师大神的人工智能教程吧。零基础!通俗易懂!风趣幽默!还带黄段子!希望你也加入到我们人工智能的队伍中来!https://blog.youkuaiyun.com/jiangjunshow