import re
import os.path
import requests
class spilder:
def _init_(self,page):
self.page=page
#创建文件夹
def CreateFile(self,page):
pages='images\{}'.format(self.page)
if not os.path.exists(pages):
os.mkdir(pages)
else:
pass
#下载html
def DownloadUrl(self,page):
self.CreateFile(self.page)
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0;Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0'}
url='http://jandan.net/pic/page-{}#comments'.format(self.page)
response=requests.get(url,headers=headers)
html=response.content.decode('utf8')
self.AnalysisImg(html)
#解析html
def AnalysisImg(self,html):
str='<img src="(.*?)"'
data=re.findall(str,html)
for item in data:
if not item.startswith('http'):
item='http:'+item
self.DownloadImg(item, self.page)
#下载图片
def DownloadImg(self,item,page):
response=requests.get(item)
imgs=response.content
lists=item.split('/')
if len(lists)<6:
strs="images\{}\{}".format(self.page,lists[4])
else:
strs="images\{}\{}".format(self.page,lists[5])
self.StorageImg(strs,imgs)
#存储图片
def StorageImg(self,strs,imgs):
with open(strs,'wb',) as f:
f.write(imgs)
def begin(self):
self.DownloadUrl(self.page)
if __name__ == '__main__':
spilder=spilder()
spilder.page=102
spilder.begin()