1、需求
有多个文件的下载链接,使用多进程或者多线程快速下载文件,落盘到本地,同时显示固件的下载进度。
2、两种方案对比
2.1、多线程 + GET + tqdm进度条
import requests,os
import threading
import random
import time
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, Future, as_completed, wait
from multiprocessing import cpu_count
urls= [
"https://bigota.d.miui.com/9.8.29/miui_MI5SPlus_9.8.29_339fb2abdc_8.0.zip",
"https://bigota.d.miui.com/V12.0.2.0.PDCCNXM/miui_MI6X_V12.0.2.0.PDCCNXM_7c0078f6ae_9.0.zip",
"https://download.pro.sony/21/03/aPaWpHqoiM/sony_pjupdate_2015_5003_pjs_auth.zip"
]
def download_file(url):
print("------", "Start download with urllib")
name = url.split("/")[-1]
resp = requests.get(url, stream=True)
content_size = int(resp.headers['Content-Length']) / 1024 # 确定整个安装包的大小
path = os.path.join(os.getcwd(), name)
print("File path:%s, content_size:%s" %(path, content_size))
with open(path, "wb") as file:
print("\rFile %s, total size is: %s" % (name, content_size))
for data in tqdm(iterable=resp.iter_content(1024), total=content_size, unit='k', desc=name):
file.write(data)
print("%s download ok" % name)
def test_tqdm():
executor = ThreadPoolExecutor(max_workers=cpu_count()) # 线程池设置,最多同时跑8个线程
for url in urls:
args = [url,]
tasks = [executor.submit(lambda p:download_file(*p), args)]
wait(tasks)
test_tqdm()
展现形式:
2.2、多线程 + urlretrieve + 普通进度条
import requests,os
import threading
import random
import time
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, Future, as_completed, wait
from multiprocessing import cpu_count
from urllib import request
urls= [
"https://bigota.d.miui.com/9.8.29/miui_MI5SPlus_9.8.29_339fb2abdc_8.0.zip",
"https://bigota.d.miui.com/V12.0.2.0.PDCCNXM/miui_MI6X_V12.0.2.0.PDCCNXM_7c0078f6ae_9.0.zip",
"https://download.pro.sony/21/03/aPaWpHqoiM/sony_pjupdate_2015_5003_pjs_auth.zip"
]
def cbk(a, b, c):
"""
func:
回调函数
params:
a:已经下载的数据块
b:数据块的大小
c:远程文件的大小
"""
start = time.perf_counter()
per = 100.0*a*b/c
if per>100:
per=100
dur = time.perf_counter() - start
print('\r'+'[下载进度]:%s%.2f%%[%.2fs]' % ('>'*int(a*b*50/c), float(per), dur), end='')
if per == 100: # 解决end=''引发的不换行
print()
def download(url):
file_name = url.split("/")[-1]
request.urlretrieve(url, file_name, cbk)
def test_urlretrieve():
executor = ThreadPoolExecutor(max_workers=cpu_count()) # 线程池设置,最多同时跑8个线程
for url in urls:
args = [url,]
tasks = [executor.submit(lambda p:download(*p), args)]
wait(tasks)
test_urlretrieve()
展现形式:
三个下载文件的进度条初始在一行z打印,后面自动分开。

该博客对比了两种多线程下载文件的方法,一种结合GET请求和tqdm库显示进度条,另一种使用urlretrieve和普通进度条。通过ThreadPoolExecutor实现多线程并行下载,每种方法都展示了下载进度。

4191

被折叠的 条评论
为什么被折叠?



