import urllib
import sys
#一、urlretrieve的进度显示
def transfer_progress(blocks_transfered, block_size, file_size):
percent = float((blocks_transfered * block_size * 100) / file_size)progress = float(blocks_transfered * block_size / 1024)
downspeed = (float(blocks_transfered * block_size) / float(time.time() - starttime)) / 1024
sys.stdout.write("Complete: %.0f%% - Downloaded: %.2fKb - Speed: %.3fkb/s\r" % (percent, progress, downspeed))
sys.stdout.flush()
def main():
if len(sys.argv) < 2:
print "Need cnet.com link as argument, use cnet.com link"
urllib.urlretrieve(finnalUrl, fileName, transfer_progress)
#二、URL解码(例子为俄文编码后的解码显示)
string = '%CF%EB%EE%F5%EE%E9%20%EB%E5%E9%F2%E5%ED%E0%ED%F2'string = urllib.unquote(string)
conver = string.decode('windows-1251')
print conver
三、urllib2带有cookie头的下载方式:
headers = {
'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
'Accept' : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
'Range' : 'bytes=0-',
'Cookie' : 'VISITOR_INFO1_LIVE=tzu_phswJEQ'}
f = file('./abcdefg', 'wb')
req = urllib2.Request(url, None, headers)
response = self.opener.open(req)
print response.headers
data = response.read()
f.write(data)
======================================================================
========================= Youtube下载例子 ==============================
======================================================================
import re
import urllib
import urllib2
import cookielib
class Youtube():
def __init__(self):
self.html5URL = 'http://www.youtube.com/html5'
self.cookies = {}
self.token = ''
self.cookiejar = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
def GetCookies(self, string):
allitem = string.split('\r\n')
for item in allitem:
value = item.split(': ')
if len(value) == 2 and value[0] == 'Set-Cookie':
value = value[1].split('; ')
if value[0]:
self.cookies = self.cookies + value[0]
print 'cookies are : ', self.cookie
def GetCookiesString(self):
strCookies = ''
data = self.cookies
for item in data.keys():
strCookies = strCookies + item + '=' + data.get(item) + '; '
return strCookies[0:len(strCookies) - 1]
def LoginYoutubeHtml5(self):
headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2'}
req = urllib2.Request(self.html5URL, None, headers)
print 'LoginYoutubeHtml5 ===req=== is', req
reponse = self.opener.open(req)
html_code = reponse.read()
token = re.findall(r'\'XSRF_TOKEN\':\s*?\'(\w+)\'', html_code)
if token:
self.token = token[0]
for item in self.cookiejar:
self.cookies[item.name] = item.value
print 'LoginYoutubeHtml5 ===self.cookies=== is', self.cookies
print 'LoginYoutubeHtml5 ===self.token=== is: ', self.token
reponse.close()
def Post2UseHtml5Video(self):
headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2', 'Referer' : self.html5URL}
values = {'enable_html5' : 'true', 'session_token' : self.token}
data = urllib.urlencode(values)
req = urllib2.Request(self.html5URL, data, headers)
response = self.opener.open(req)
print 'Post2UseHtml5Video ===self.cookies=== is', self.cookiejar
print 'Post2UseHtml5Video ===response.headers=== is', response.headers
def GetVideo(self, url, referer):
headers = {
# 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
# 'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
# 'Accept' : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
'Range' : 'bytes=0-',
'Cookie' : 'VISITOR_INFO1_LIVE=up3mRz6yRP4'}
# print 'GetVideo ===self.cookies=== is', self.cookiejar
f = file('./abcdefg', 'wb')
req = urllib2.Request(url, None, headers)
response = self.opener.open(req)
print '\nGetVideo ===response.headers=== is: ', response.headers
data = response.read()
f.write(data)
if __name__ == '__main__':
weburl = raw_input('Please entry youtube html5 download url:')
refer = raw_input('Please entry youtube html5 referer url:')
iplugin = Youtube()
# iplugin.LoginYoutubeHtml5()
# iplugin.Post2UseHtml5Video()
iplugin.GetVideo(weburl, refer)