# -*- coding = utf-8 import urllib import urllib2 import re def getHtml(url): page = urllib.urlopen(url) html = page.read() return html def cbk(a,b,c): per = 100.0 * a * b / c #a已下载的数据块 b下载数据块的大小 c总数据块大小 if per > 100: per = 100 print('%.2f%%' %per) #取两位浮点小数 def getImg(html): reg = r'src="(.+?\.jpg)" pic_ext' #非贪婪模式的无限匹配任意字符(除了/n) imgre = re.compile(reg) imglist = re.findall(imgre,html) x=0 for imgurl in imglist: urllib.urlretrieve(imgurl, '%s.jpg' %x,cbk) x = x+1 return imglist html = getHtml("http://tieba.baidu.com/p/2460150866") print(getImg(html))
运行结果很理想 显示下载进度