Python通过HTTP协议定期抓取文件(ZT)

最新推荐文章于 2024-06-17 16:55:09 发布

最新推荐文章于 2024-06-17 16:55:09 发布 · 207 阅读

文章标签：

#Python #FP #IBM

Python 专栏收录该内容

8 篇文章

订阅专栏


#!usr/bin/python

import urllib2,time;
class ErrorHandler(urllib2.HTTPDefaultErrorHandler):
    def http_error_default(self, req, fp, code, msg, headers):
        result = urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
        result.status = code
        return result

URL='http://www.ibm.com/developerworks/js/ajax1.js'
req=urllib2.Request(URL)
mgr=urllib2.build_opener(ErrorHandler())

while True:
    ns=mgr.open(req)
    if(ns.headers.has_key('last-modified')):
        modified=ns.headers.get('last-modified')
    if(ns.code==304):
        print '''
          ==============================
              NOT MODIFIED
          ==============================
        '''
    elif(ns.code==200):
        print ns.read()
    else:
        print 'there is an error';

    if(not locals().has_key('modified')):
        modified=time.time();
    req.add_header('If-Modified-Since',modified)
    time.sleep(10)