#-*- coding:utf-8 -*- #encoding:utf8 import urllib.request import re import chardet def getHtml(url): #html=urllib.request.urlopen(url).read() #charset=chardet.detect(html) #html=html.decode("utf-8") #print(charset) req = urllib.request.Request(url) req.add_header('User-Agent','Mozilla/5.0 (iPod; U; CPU like Mac OS X; en) AppleWebKit/420.1 (KHTML, like Gecko) Version/3.0 Mobile/3A101a Safari/419.3') response = urllib.request.urlopen(req) html= response.read().decode("utf-8") return html def getImage(htmls): regx= r'https://[\S]*\.jpg' pattern=re.compile(regx) get_img=re.findall(pattern,repr(htmls)) num=1 for img in get_img: image=getHtml(img) print(image) with open(str(num)+'.jpg','w') as fp: fp.write(image) num +=1 print("正在下载第%s张图片"%num) return url="http://home.firefoxchina.cn/" html=getHtml(url) #print(html) getImage(html)
简单的爬虫,爬取火狐主页的图片并保存到本地
最新推荐文章于 2024-03-13 14:28:07 发布