import re import urllib.request def getHtml(url): page=urllib.request.urlopen(url) html=page.read() return html html=getHtml("https://book.douban.com") html=html.decode('UTF-8') def getImg(html): reg = r'src="([.*\S]*\.jpg)"' imre=re.compile(reg) imlist=re.findall(imre,html) return imlist imlist=getImg(html) imgName=0 for imgPath in imlist: try: f=open('/Users/ln/Desktop/spider/'+str(imgName)+".jpg",'wb') f.write((urllib.request.urlopen(imgPath)).read()) print(imgPath) f.close() except Exception as e: print(imgPath+'error') imgName+=1 print('all done')
爬虫--python3实现网页爬虫下载图片
最新推荐文章于 2020-02-19 20:30:49 发布
6030

被折叠的 条评论
为什么被折叠?



