import requests
import re
import time
from bs4 import BeautifulSoup
count = 1
url = 'http://jandan.net/ooxx/page-1#comments'
url_used = []
url_used.append(url)
list1 = []
headers = { 'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
while len(url_used):
r = requests.get(url, headers = headers)
a = r.text
soup = BeautifulSoup(a, 'html.parser')
lq = (soup.find("a", {'href': re.compile("^http://jandan.net/ooxx/page-\d*#comments")}))
list = soup.findAll("img", {'src': re.compile(".*\.jpg")})
for i in list:
list1.append(i['src'])
url = lq['href']
print(url)
url_used.append(url)
for i in list1:
try:
b = requests.get('http:' + i, headers=headers, timeout=2)
print("正在下载第 %d 张" % count)
count += 1
path = re.split('/', i)[-1]
with open(path, 'wb') as f:
f.write(b.content)
except:
print('error')
continue
list1 = []