

# -*- coding: utf-8 -*-
import urllib
import urllib2, cookielib
import httplib2
import re
from sgmllib import SGMLParser
import htmlentitydefs
from BeautifulSoup import BeautifulSoup
def main():
'''get image urls from the target html'''
http = httplib2.Http()
url = 'http://www.targetpage.com/index.html'
response, content = http.request(url, 'GET')
soup = BeautifulSoup(content)
for i, img in enumerate(soup.findAll('img')):
#文件判断域名
if img['src'].startswith('http://file.'):
downloadImage(img['src'])
else:
print 'not the one wanted! canceled'
def downloadImage(imageUrl):
'''download image'''
try:
imageName = imageUrl.split('/')[-1]
imgFile = u'C:\\Users\\lenovo\\Desktop\\新建文件夹\\aoye\\'+imageName
urllib.urlretrieve(imageUrl, imgFile)
print "Save image succeed! " + imageUrl
return True
except Exception, e:
print e,
print "Save image failed! " + imageUrl
# import sys
# logger.info("Something is wrong, now quit")
# logger.info("Unexpected error: " + sys.exc_info())
return False
if "__main__" == __name__:
main()