python的urllib模块，学习的部分有用方法

最新推荐文章于 2025-05-15 22:00:00 发布

aaronchan1028

最新推荐文章于 2025-05-15 22:00:00 发布

阅读量836

点赞数

CC 4.0 BY-SA版权

分类专栏： Python 文章标签： python youtube token url string float

本文链接：https://blog.youkuaiyun.com/aaronchan1028/article/details/6729336

Python 专栏收录该内容

18 篇文章

订阅专栏

本文全面介绍了信息技术领域的多个细分技术领域，包括前端开发、后端开发、移动开发、游戏开发、大数据开发等。从HTML、CSS、JavaScript到AI音视频处理，从Unity3D到Hadoop，涵盖了从网页到游戏再到数据处理的广泛内容。通过深入解析这些技术，读者可以更好地理解信息技术的全貌，并找到自己感兴趣的细分领域。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import urllib

import sys

#一、urlretrieve的进度显示

def transfer_progress(blocks_transfered, block_size, file_size):

    percent = float((blocks_transfered * block_size * 100) / file_size)
    progress = float(blocks_transfered * block_size / 1024)
    downspeed = (float(blocks_transfered * block_size) / float(time.time() - starttime)) / 1024
    sys.stdout.write("Complete: %.0f%% - Downloaded: %.2fKb - Speed: %.3fkb/s\r" % (percent, progress, downspeed))
    sys.stdout.flush()

def main():
    if len(sys.argv) < 2:
        print "Need cnet.com link as argument, use cnet.com link"
    urllib.urlretrieve(finnalUrl, fileName, transfer_progress)

#二、URL解码（例子为俄文编码后的解码显示）

string = '%CF%EB%EE%F5%EE%E9%20%EB%E5%E9%F2%E5%ED%E0%ED%F2'
string = urllib.unquote(string)
conver = string.decode('windows-1251')

print conver

三、urllib2带有cookie头的下载方式：

        headers = {
                     'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
                   'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
                   'Accept' : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
                  'Range' : 'bytes=0-',
                     'Cookie' : 'VISITOR_INFO1_LIVE=tzu_phswJEQ'}
        f = file('./abcdefg', 'wb')
        req = urllib2.Request(url, None, headers)
        response = self.opener.open(req)
        print response.headers
        data = response.read()
        f.write(data)

======================================================================

========================= Youtube下载例子 ==============================

======================================================================

import re
import urllib
import urllib2
import cookielib

class Youtube():
    def __init__(self):
        self.html5URL = 'http://www.youtube.com/html5'
        self.cookies = {}
        self.token = ''
        self.cookiejar = cookielib.CookieJar()
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))

    def GetCookies(self, string):
        allitem = string.split('\r\n')
        for item in allitem:
            value = item.split(': ')
            if len(value) == 2 and value[0] == 'Set-Cookie':
                value = value[1].split('; ')
                if value[0]:
                    self.cookies = self.cookies + value[0]
        print 'cookies are : ', self.cookie

    def GetCookiesString(self):
        strCookies = ''
        data = self.cookies
        for item in data.keys():
            strCookies = strCookies + item + '=' + data.get(item) + '; '
        return strCookies[0:len(strCookies) - 1]

    def LoginYoutubeHtml5(self):
        headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2'}
        req = urllib2.Request(self.html5URL, None, headers)
        print 'LoginYoutubeHtml5 ===req=== is', req
        reponse = self.opener.open(req)
        html_code = reponse.read()
        token = re.findall(r'\'XSRF_TOKEN\':\s*?\'(\w+)\'', html_code)
        if token:
            self.token = token[0]
        for item in self.cookiejar:
            self.cookies[item.name] = item.value
        print 'LoginYoutubeHtml5 ===self.cookies=== is', self.cookies
        print 'LoginYoutubeHtml5 ===self.token=== is: ', self.token
        reponse.close()

    def Post2UseHtml5Video(self):
        headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2', 'Referer' : self.html5URL}
        values = {'enable_html5' : 'true', 'session_token' : self.token}
        data = urllib.urlencode(values)
        req = urllib2.Request(self.html5URL, data, headers)
        response = self.opener.open(req)
        print 'Post2UseHtml5Video ===self.cookies=== is', self.cookiejar
        print 'Post2UseHtml5Video ===response.headers=== is', response.headers

    def GetVideo(self, url, referer):
        headers = {
#                   'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
#                   'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
#                   'Accept' : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
                   'Range' : 'bytes=0-',
                   'Cookie' : 'VISITOR_INFO1_LIVE=up3mRz6yRP4'}
#        print 'GetVideo ===self.cookies=== is', self.cookiejar
        f = file('./abcdefg', 'wb')
        req = urllib2.Request(url, None, headers)
        response = self.opener.open(req)
        print '\nGetVideo ===response.headers=== is: ', response.headers
        data = response.read()
        f.write(data)

if __name__ == '__main__':
    weburl = raw_input('Please entry youtube html5 download url:')
    refer = raw_input('Please entry youtube html5 referer url:')
    iplugin = Youtube()
#    iplugin.LoginYoutubeHtml5()
#    iplugin.Post2UseHtml5Video()
    iplugin.GetVideo(weburl, refer)