import requests
import threading
import time
import re
import os
url = """
https://example.com/galleries/1712777/3.jpg
"""
treadNum = 4
ma = re.search(r'(https.*)/(\d+)/\d+(\.\S+)',url)
folder = "F:/manga/"
filetype = ".jpg"
if ma:
prefix = ma.group(1)+"/"+ma.group(2)+"/"
folder += ma.group(2)+"/"
filetype = ma.group(3)
if not os.path.exists(folder):
os.makedirs(folder)
flag404 = False
lastPictureNumber = 500
def download_jpg(cur,arr):
global prefix,flag404,folder,lastPictureNumber,filetype
print("%-7s|%3s" % ("start",cur),)
t1 = time.perf_counter()
if flag404 and cur > lastPictureNumber:
arr.pop()
print("%-7s|%3s" % ("pass",cur))
return
jpg = str(cur)+filetype
newurl= prefix + str(cur)+filetype
try:
f=requests.get(newurl)
except:
arr.pop()
print("%-7s|%3s" % ("error",cur))
return
t2= time.perf_counter()
if int(f.status_code)>300 and cur > 5:
lastPictureNumber = cur
flag404 = True
arr.pop()
print("%-7s|%3s|%6.3fs|%3s" % ("end",cur,t2-t1,f.status_code),)
return
with open(folder+jpg,"wb") as picture:
picture.write(f.content)
t3=time.perf_counter()
arr.pop()
print("%-7s|%3s|%6.3fs|%3s" % ("done",cur,t3-t1,f.status_code),)
if __name__ == '__main__':
print("Task Start")
ths = []
i = 0
while True:
if(i>lastPictureNumber):
break
if(len(ths)) < treadNum:
ths.append(i+1)
th = threading.Thread(target=download_jpg, args=(i+1, ths))
th.start()
i+=1
if len(ths) >= treadNum:
time.sleep(0.1)