看到很多个版本多线程下载图片,自己改了改,主要是异常以及urlretrieve的使用
import time
from multiprocessing.pool import ThreadPool
import os
import urllib
from urllib.request import urlretrieve
def download_image(url, our_dir):
'''
根据url下载图片
:param url:
:return: 返回保存的图片途径
'''
if url == '':
return None
basename = os.path.basename(url)
if basename.endswith(('jpg', 'png', 'jpeg', 'bmp')):
print(basename + ':already has format')
else:
basename = basename + '.jpg'
try:
urllib.request.urlopen(url)
# print('OK')
filename = os.path.join(our_dir, basename)
if os.path.exists(filename):
pass
print(basename, 'is already exist in ', our_dir)
else:
urlretrieve(url, filename)
print('re download:', filename)
return filename
except urllib.error.URLError as e:
if hasattr(e, "code"):
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
return None
print("download image failed:{}".format(url))
return None
def download_image_thread