# -*- coding: utf-8 -*-
import re
import urllib
import urllib2
class Spider:
def downLoad(self, url):
html = self.getHtml(url)
self.getImages(html)
#根据url 得到 html
def getHtml(self, url):
page = urllib.urlopen(url)
return page.read()
def getImages(self, html):
rgx = re.compile(r'src="(.+?\.jpg)" pic_ext')
imgList = rgx.findall(html) #根据 html 匹配出 图片的url
x = 0
for cur in imgList:
print ("下载第 %d 张" % x)
urllib.urlretrieve(cur, "%s.jpg" %x)
x = x + 1
if __name__ == "__main__":
url = "http://tieba.baidu.com/p/2460150866"
spider = Spider()
spider.downLoad(url)