这个爬的就是有点过分了,基本是网页https://logopond.com上的图标都会下载
import requests
from requests.exceptions import RequestException
import re
import os
def get_one_page(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def DownPage(file_name, photo_html, num):
try:
r = requests.get(photo_html)
if r.status_code == 200:
with open(os.getcwd() + '/' + file_name + '/' + str(num) + '.png', 'wb') as f:
f.write(r.content)
f.close()
num += 1
return num
else:
print('Error')
return None
except RequestException:
return None
def DeatleFile(file_name):
path = os.getcwd() + '\\' + file_name
for i in os.listdir(os.getcwd()):
if file_name in i:
for ii in os.listdir(path):
os.remove(path + '\\' + ii)
os.removedirs(path)
def CreatFile(file_nam):
os.makedirs(os.getcwd() + '\\' + file_nam)
def main():
file_nam = 'photo'
DeatleFile(file_nam)
CreatFile(file_nam)
i = 1
num = 0
a = 0
while 1:
url = 'https://logopond.com/gallery/list/?gallery=featured&filter=&month=&year=&page=' + str(i)
html = get_one_page(url)
print(url)
pattern = re.compile('<a href.*?img src="(.*?)".*?./a>', re.S)
try:
items = re.findall(pattern, html)
except TypeError:
i += 1
continue
for photo_html in list(items):
if 'https://logopond.com' not in photo_html:
photo_html = 'https://logopond.com' + photo_html
else:
continue
a = DownPage(file_nam, photo_html, num)
if None != a:
num = a
else:
continue
i += 1
if __name__ == '__main__':
main()