运行环境:linux
依赖程序:python 2.7
import re
import sys
import urllib
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def down_file(url):
file_name = url.split('/')[-1]
print file_name
u = urllib.urlopen(url)
f = open(file_name, 'wb')
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
f.close()
def getFile(html):
compile_rule=re.compile(r'<a.*?href=".+".*?>(.*).tar.bz2</a>')
url_list=re.findall(compile_rule, html)
str = r'.tar.bz2'
for one in url_list:
url = "http://mirrors.ustc.edu.cn/gnome/desktop/2.91/2.91.2/sources/" +one+str
print url
down_file(url)
#print url_list
return url_list
html = getHtml("http://mirrors.ustc.edu.cn/gnome/desktop/2.91/2.91.2/sources/")
print getFile(html)