这个是校验下载文件准确性的代码,其中用到了简单的多线程,十线程执行,还有锁的概念。
代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#20180422
import urllib
import re
import hashlib
import os
import threading
def get_url_info(url):
# STATUS_UNKONW, STATUS_URL_ERROR, STATUS_FILE_ERROR, STATUS_FILE_NORMAL = 0, 1, 2, 3
error_code = 0
# http://dlied6.qq.com/wsus.ds.download.windowsupdate.com/msdownload/update/software/secu/2012/10/windows6.1-kb2729452-ia64_dd935fb81a90431565a39a36f808f023fe9653fd.psf
sha1_url = re.search('(?<=_).*(?=\.)', url).group(0)
file_temp = os.getcwd() + '\\' + sha1_url + '.tmp'
try:
urllib.urlretrieve(url, file_temp)
except:
print('url error')
error_code = 1
return {'status': error_code, 'Sha1_url': sha1_url, 'Sha1': '', 'MD5': ''}
hash_sha1_obj = hashlib.sha1()
hash_md5_obj = hashlib.md5()
f = open(file_temp, 'rb')
while True:
data = f.read(8096)
if not data:
break
hash_sha1_obj.update(data)
hash_md5_obj.update(data)
f.close()
os.remove(file_temp)
hash_md5 = hash_md5_obj.hexdigest()
hash_sha1 = hash_sha1_obj.hexdigest()
if hash_sha1.lower() == sha1_url.lower():
error_code = 3
else:
error_code = 2
return {"status": error_code, "Sha1_url": sha1_url, "Sha1": hash_sha1, "MD5": hash_md5}
class UrlCheckThread(threading.Thread):
def __init__(self, lock, f):
threading.Thread.__init__(self)
self.f = f
self.lock = lock
def run(self):
while True:
self.lock.acquire()
url = f.readline()
self.lock.release()
if not url:
break
dic_ret = get_url_info(url)
self.lock.acquire()
print url, dic_ret, '\n'
self.lock.release()
if __name__ == '__main__':
threadLock = threading.Lock()
thread_list = []
f = open('.\url_list.txt', 'r')
for i in range(10):
thread = UrlCheckThread(threadLock, f)
thread.start()
thread_list.append(thread)
for thread in thread_list:
thread.join()
f.close()