python的urllib模块,学习的部分有用方法

本文全面介绍了信息技术领域的多个细分技术领域,包括前端开发、后端开发、移动开发、游戏开发、大数据开发等。从HTML、CSS、JavaScript到AI音视频处理,从Unity3D到Hadoop,涵盖了从网页到游戏再到数据处理的广泛内容。通过深入解析这些技术,读者可以更好地理解信息技术的全貌,并找到自己感兴趣的细分领域。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import urllib

import sys


#一、urlretrieve的进度显示

def transfer_progress(blocks_transfered, block_size, file_size):

    percent = float((blocks_transfered * block_size * 100) / file_size)
    progress = float(blocks_transfered * block_size / 1024)
    downspeed = (float(blocks_transfered * block_size) / float(time.time() - starttime)) / 1024
    sys.stdout.write("Complete: %.0f%% - Downloaded: %.2fKb - Speed: %.3fkb/s\r" % (percent, progress, downspeed))
    sys.stdout.flush()

def main():
    if len(sys.argv) < 2:
        print "Need cnet.com link as argument, use cnet.com link"
    urllib.urlretrieve(finnalUrl, fileName, transfer_progress)


#二、URL解码(例子为俄文编码后的解码显示)

string = '%CF%EB%EE%F5%EE%E9%20%EB%E5%E9%F2%E5%ED%E0%ED%F2'
string = urllib.unquote(string)
conver = string.decode('windows-1251')

print conver


三、urllib2带有cookie头的下载方式:

        headers = {
                             'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
                             'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
                             'Accept'  : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
                              'Range' : 'bytes=0-',
                             'Cookie' : 'VISITOR_INFO1_LIVE=tzu_phswJEQ'}
        f = file('./abcdefg', 'wb')
        req = urllib2.Request(url, None, headers)
        response = self.opener.open(req)
        print  response.headers
        data = response.read()
        f.write(data)



======================================================================

========================= Youtube下载例子 ==============================

======================================================================


import re
import urllib
import urllib2
import cookielib


class Youtube():
    def __init__(self):
        self.html5URL = 'http://www.youtube.com/html5'
        self.cookies = {}
        self.token = ''
        self.cookiejar = cookielib.CookieJar()
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))

    def GetCookies(self, string):
        allitem = string.split('\r\n')
        for item in allitem:
            value = item.split(': ')
            if len(value) == 2 and value[0] == 'Set-Cookie':
                value = value[1].split('; ')
                if value[0]:
                    self.cookies = self.cookies + value[0]
        print 'cookies are : ', self.cookie

    def GetCookiesString(self):
        strCookies = ''
        data = self.cookies
        for item in data.keys():
            strCookies = strCookies + item + '=' + data.get(item) + '; '
        return strCookies[0:len(strCookies) - 1]

    def LoginYoutubeHtml5(self):
        headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2'}
        req = urllib2.Request(self.html5URL, None, headers)
        print 'LoginYoutubeHtml5 ===req=== is', req
        reponse = self.opener.open(req)
        html_code = reponse.read()
        token = re.findall(r'\'XSRF_TOKEN\':\s*?\'(\w+)\'', html_code)
        if token:
            self.token = token[0]
        for item in self.cookiejar:
            self.cookies[item.name] = item.value
        print 'LoginYoutubeHtml5 ===self.cookies=== is', self.cookies
        print 'LoginYoutubeHtml5 ===self.token=== is: ', self.token
        reponse.close()

    def Post2UseHtml5Video(self):
        headers = {'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2', 'Referer' : self.html5URL}
        values = {'enable_html5' : 'true', 'session_token' : self.token}
        data = urllib.urlencode(values)
        req = urllib2.Request(self.html5URL, data, headers)
        response = self.opener.open(req)
        print 'Post2UseHtml5Video ===self.cookies=== is', self.cookiejar
        print 'Post2UseHtml5Video ===response.headers=== is', response.headers

    def GetVideo(self, url, referer):
        headers = {
#                   'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
#                   'Referer' : 'http://www.youtube.com/watch?v=_OFMkCeP6ok&feature=topvideos_music',
#                   'Accept'  : 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5',Oq3MNJezCok
                   'Range' : 'bytes=0-',
                   'Cookie' : 'VISITOR_INFO1_LIVE=up3mRz6yRP4'}
#        print 'GetVideo ===self.cookies=== is', self.cookiejar
        f = file('./abcdefg', 'wb')
        req = urllib2.Request(url, None, headers)
        response = self.opener.open(req)
        print '\nGetVideo ===response.headers=== is: ', response.headers
        data = response.read()
        f.write(data)


if __name__ == '__main__':
    weburl = raw_input('Please entry youtube html5 download url:')
    refer = raw_input('Please entry youtube html5 referer url:')
    iplugin = Youtube()
#    iplugin.LoginYoutubeHtml5()
#    iplugin.Post2UseHtml5Video()
    iplugin.GetVideo(weburl, refer)
    
   


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值