Python3的大文件分片下载(asyncio协程版)

本篇博客介绍了一个使用Python3实现的大文件分片下载程序。该程序通过异步IO技术,能够将大文件分割成多个片段进行并发下载,并在下载完成后合并成完整文件。代码中详细展示了如何利用aiohttp进行网络请求,以及如何管理和合并临时文件。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Python3的大文件分片下载

源代码

import asyncio
import aiohttp
from tqdm import tqdm
import os

class bigfile_download:
    def __init__(self, session, url, tmp_path='./down_cache', proxy=None, file_fragment_size = 1024 * 1024 * 2):
        self.url = url
        self.session   = session
        self.proxy     = proxy
        self.filename  = url.split('/')[-1]        
        self.mtd_list  = []
        self.tmp_path  = tmp_path
        self.file_fragment_size = file_fragment_size
        
        self.__mkdir(tmp_path)

    def __mkdir(self, path):
        isExists=os.path.exists(path)
        if not isExists:
            os.makedirs(path) 
            print(path+' 创建成功')
            return True
        else:
            print(path + ' 目录已存在')
            return False

    async def fetch(self, url, method='get', headers = None, retryCount = 3):
        r = None
        curr_url = url
        for i in range(1, retryCount):
            if method == 'head':
                r = await self.session.head(curr_url, proxy=self.proxy, headers = headers)
            else:
                r = await self.session.get(curr_url, proxy=self.proxy, headers = headers)
            if r.status in (301, 302):
                curr_url = r.headers['Location']
                r.close()
            else:
                return r
        return r

    async def get_content_length_from_net(self):
        try:
            r = await self.fetch(self.url, 'head')
            self.filesize = int(r.headers['Content-Length'])
        finally:
            if r != None:
               r.close()
        
        print("filesize = {0}".format(self.filesize))

    async def calculation_fragment(self):
        filesize  = self.filesize
        start     = 0
        end       = -1
        step      = self.file_fragment_size
        
        file_fragment_n     = 1
        
        while end < filesize - 1:
            start = end + 1
            end   = start + step - 1
            total_size = end - start + 1

            if end > filesize:
                total_size = end - start
                end = filesize
            
            headers={'Range':'bytes={0}-{1}'.format(start, end)}
            self.mtd_list.append((file_fragment_n, start, end, headers, total_size))
            
            file_fragment_n = file_fragment_n + 1
        
        print("file_fragment_num = {0}".format(len(self.mtd_list)))

    async def fragment_down(self, mtd):
        target_filename = '{2}/{0}.{1}'.format(self.filename, mtd[0], self.tmp_path)
        total_size = mtd[4]
        if os.path.exists(target_filename):
            target_filename_size = os.path.getsize(target_filename)
            if total_size == target_filename_size:
                return
            else:
                os.remove(target_filename)                

        pbar = tqdm(desc = 'task{0}'.format(mtd[0]), total=mtd[4], leave=False)
        r = None
        try:
            r = await self.fetch(self.url, headers = mtd[3])
            with open(target_filename, 'wb') as f:
                async for chunk, _ in r.content.iter_chunks():
                    f.write(chunk)
                    chunk_size = len(chunk)
                    pbar.update(chunk_size)
        finally:
            if r != None:
                r.close()
                
    async def fragment_down_all(self, taskPoolMaxNum):
        dltasks = set()

        for mtd in self.mtd_list:
            if len(dltasks) >= taskPoolMaxNum:
                dones, dltasks = await asyncio.wait(dltasks, return_when=asyncio.FIRST_COMPLETED)
            dltasks.add(asyncio.ensure_future(self.fragment_down(mtd))) 
        dones, dltasks = await asyncio.wait(dltasks)
            
    async def fragment_merge(self, target_path=None):
        if target_path == None:
            target_filename = target_path + self.filename
        else:    
            target_filename = self.filename
            
        with open(target_filename, 'wb') as newfile:
            for mtd in self.mtd_list:
                target_fragment_filename = '{1}/{0}.{2}'.format(self.filename, self.tmp_path, mtd[0])
                with open(target_fragment_filename, 'rb') as fragment_file:
                    newfile.write(fragment_file.read())
        print('fragment_merge end!')

    async def fragment_down_check(self):
        for mtd in self.mtd_list:
            target_filename = '{2}/{0}.{1}'.format(self.filename, mtd[0], self.tmp_path)
            total_size = mtd[4]
            if os.path.exists(target_filename):
                target_filename_size = os.path.getsize(target_filename)
                if total_size == target_filename_size:
                    return True
                else:
                    print('fragment_down_check error1!')
                    return False
            else:
                print('fragment_down_check error2!')
                return False
        
        print('fragment_down_check ok!')
        return True

async def download_bigfile(url, proxy=None):
    file_fragment_size = 1024 * 1024 * 2
    task_num_max = 5
    tmp_path = './down_cache'
    target_path = './'
    
    async with aiohttp.ClientSession() as session:
        bd = bigfile_download(session, url, tmp_path=tmp_path, proxy=proxy, file_fragment_size = file_fragment_size)
        await bd.get_content_length_from_net()
        await bd.calculation_fragment()
        for i in range(1, 10):
            if await bd.fragment_down_check() == True:
                await bd.fragment_merge(target_path)
                break
            await bd.fragment_down_all(task_num_max)

async def main():
    url   = 'https://repo.msys2.org/distrib/x86_64/msys2-x86_64-20210228.exe'
    await download_bigfile(url = url)
    
if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值