#!/usr/bin/python # coding:utf-8 # 爬虫类 import urllib import re class GetImages: count = 0 def __init__(self): print # 读取html def getHtmlContent(self, url): page = urllib.urlopen(url) return page.read() # 提取图片 def getPicture(self, page): jpgReg = re.compile(r'<img.+?src="(.+?\.jpg)"') # 正则 jpgs = re.findall(jpgReg, page) return jpgs # 下载 def batchDownloadJPGs(self, jpgs, path='./'): num = 1 for url in jpgs: urllib.urlretrieve(url, ''.join([path, '{0}.jpg'.format(num)])) num = num + 1 self.count = self.count + 1 # 开始爬数据 def run(self, url): page = self.getHtmlContent(url) jpgs = self.getPicture(page) self.batchDownloadJPGs(jpgs) print '成功添加',self.count,'条张图片' # 要抓取图片的页面 url = 'http://images.baidu.com/search/detail?z=0&word=%E6%91%84%E5%BD%B1%E5%B8%88%E5%88%98%E6%9C%8Bpeter&hs=0&pn=5&spn=0&di=0&pi=57629150605&tn=baiduimagedetail&is=0%2C0&ie=utf-8&oe=utf-8&cs=974474859%2C779207686&os=&simid=&adpicid=0&lpn=0&fm=&sme=&cg=&bdtype=-1&oriquery=&objurl=http%3A%2F%2Ff.hiphotos.baidu.com%2Fimage%2Fpic%2Fitem%2F9c16fdfaaf51f3de39827b3e99eef01f3a2979bc.jpg&fromurl=&gsm=0&catename=pcindexhot&islist=&querylist=' # 执行类 obj = GetImages() obj.run(url)
转载于:https://my.oschina.net/lianglc/blog/2878878