爬虫--python3实现网页爬虫下载图片

最新推荐文章于 2020-02-19 20:30:49 发布

原创最新推荐文章于 2020-02-19 20:30:49 发布 · 232 阅读

CC 4.0 BY-SA版权

import re
import urllib.request

def getHtml(url):
    page=urllib.request.urlopen(url)
    html=page.read()
    return html
html=getHtml("https://book.douban.com")
html=html.decode('UTF-8')
def getImg(html):
    reg = r'src="([.*\S]*\.jpg)"'
    imre=re.compile(reg)
    imlist=re.findall(imre,html)
    return imlist

imlist=getImg(html)
imgName=0
for imgPath in imlist:
    try:
        f=open('/Users/ln/Desktop/spider/'+str(imgName)+".jpg",'wb')
        f.write((urllib.request.urlopen(imgPath)).read())
        print(imgPath)
        f.close()
    except Exception as e:
        print(imgPath+'error')
    imgName+=1
print('all done')