#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn> # License: GPLv2 # Author: oneleaf <oneleaf AT gmail.com> import httplib import re import urllib import os import locale def getdownurl(url): urllist=[] conn = httplib.HTTPConnection('mp3.baidu.com') conn.request("GET",url) response = conn.getresponse() html=response.read() conn.close() expression='http://220.181.27.54/m(.*)</a>' listSentence = re.findall(expression, html) lineno=0 while lineno<len(listSentence): mp3url=re.search('title=(.*)onclick',listSentence[lineno]) if mp3url: mp3url=mp3url.group(0) mp3url=re.search('http(\S*)',mp3url) if mp3url: mp3url=mp3url.group(0) try: mp3url=mp3url.decode('gbk') except:pass urllist.append(mp3url) lineno+=2 return urllist def downmp3(url,author,name,filelist): filename=author+"-"+name; for i in filelist: name=unicode(i,locale.getpreferredencoding()) if name.find(filename) == 0: print u"文件已经下载,忽略。" return 1 urllists=getdownurl(url) for i in urllists: print u"正在连接",i ext=i[-4:] try: urlopen = urllib.URLopener() fp=urlopen.open(i) data = fp.read() fp.close() filename=filename+ext; file=open(filename,'w+b') file.write(data) file.close() print u"下载成功!" return 1 except: continue return 0 if __name__ == "__main__": conn = httplib.HTTPConnection('list.mp3.baidu.com') conn.request("GET",'/list/newhits.html?id=1') response = conn.getresponse() html=response.read().decode('gbk') conn.close() expression='<a href="http://mp3.baidu.com/m(.*)</a>' listSentence = re.findall(expression, html) lineno=0 while lineno<len(listSentence): url=re.search('(.*)target',listSentence[lineno]) url='/m'+url.group(0)[:-8] name=re.search('blank>(.*)',listSentence[lineno]) name=name.group(0)[6:] author=re.search('blank>(.*)',listSentence[lineno+1]) author=author.group(0)[6:] print u"开始下载",author,name filelist=os.listdir('.'); if downmp3(url,author,name,filelist)==0: print u"下载",author,name,u'失败!' lineno+=2