使用期物futures处理并发
依序下载图片
import os
import sys
import time
import requests
POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR TR CD FR').split()
BASE_URL = 'http://flupy.org/data/flags'
DEST_DIR = 'downloads/'
def save_flag(img, filename):
path = os.path.join(DEST_DIR, filename)
with open(path, 'wb') as fp:
fp.write(img)
def get_flag(cc):
url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
resp = requests.get(url)
return resp.content
def show(text):
print(text, end=' ')
sys.stdout.flush() # 命令行打印进度刷新,即每下载完一个打印输出名称
def download_many(cc_list):
for cc in sorted(cc_list):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower() + '.gif')
return len(cc_list)
def main(downloag_many):
t0 = time.time()
count = download_many(POP20_CC)
elapsed = time.time() - t0
msg = '\n{} flags downloaded in {:.2f}s'
print(msg.format(count, elapsed))
main(download_many)
使用concurrent.futures.ThreadPoolExecutor多线程下载
- executor.map():返回值是一个迭代器,迭代器的__next__方法调用各个期物的result方法,因此我们得到的是各个期物的结果,而非期物本身
# 用到上一个实例的save_flag, get_flag, show, main 函数
from concurrent import futures
MAX_WORKERS = 20
def download_one(cc):
image = get_flag(cc)
show(cc)
save_flag(image, cc.lower() + '.gif')
return cc
def download_many(cc_list):
workers = min(MAX_WORKERS, len(cc_list))
with futures.ThreadPoolExecutor(workers) as executor:
res = executor.map(download_one, sorted(cc_list))
return len(list(res))
main(download_many)
深入理解期物
- map方法
from time import sleep, strftime
from concurrent import futures
def display(*args):
print(strftime('[%H:%M:%S]'), end=' ')
print(*args)
def loiter(n):
msg = '{}loiter({}): doing nothing for {}s...'
display(msg.format('\t'*n, n, n))
sleep(n)
msg = '{}loiter({}): done'
display(msg.format('\t'*n, n))
return n * 10
def main():
display('Script starting.')
executor = futures.ThreadPoolExecutor(max_workers=3)
results = executor.map(loiter, range(5))
display('reuslts:', results)
display('Warting for indevidual results:')
for i, result in enumerate(results):
display('result {}: {}'.format(i, result))
main()
- 把executor.map方法换成executor.submit方法和futures.as_completed函数
def download_many(cc_list):
cc_list = cc_list[:5]
with futures.ThreadPoolExecutor(max_workers=3) as executor:
to_do = []
for cc in sorted(cc_list):
future = executor.submit(download_one, cc) # 返回一个期物
to_do.append(future)
msg = 'Scheduled for {}: {}'
print(msg.format(cc, future))
results = []
for future in futures.as_completed(to_do):
res = future.result() # 获取期物执行结果
msg = '{} result: {!r}'
print(msg.format(future, res))
results.append(res)
return len(results)
-
executor.submit()方法:传入一个可调用对象,为传入的可调用对象排期,并返回一个期物
-
concurrent.futures.as_completed函数:参数是一个期物列表,返回值是一个迭代器,在期物运行结束后产出期物
-
.result()方法:在期物运行结束后调用,返回可调用对象的结果,或重新抛出执行异常。如需要在这一步上处理可能的异常,如实例的requests.exceptions.HTTPError、requests.exceptions.ConnectionError异常。
使用TQDM包显示下载进度
tqdm是第三方包,需要下载
import time
from tqdm import tqdm
for i in tqdm(range(100)):
time.sleep(.01)
总结
- 并发代码的重构:把依次执行的for循环体改成函数,以便并打调用,这里改为了,download_one。
- 使用sys.stdout.flush在命令行中刷新打印内容,以显示进度。
- tqdm模块实现文本动画进度条,处理的是可迭代对象,使用len函数确定可迭代对象大小,以显示进度条和完成的剩余时间。
- 处理并发的简单方法,使用期物。
流畅的Python——第17章