# -*- coding:utf-8 -*-
import urllib
import urllib2
import re
import os
import random
picCount = 0
picPath = "D:/testSpider"
#创建新目录
def mkdir(path):
path = path.strip()
isExists=os.path.exists(path)
if not isExists:
os.makedirs(path)
return True
else:
print u"名为",path,u'的文件夹已经创建成功'
return False
def saveImg(imageURL):
u = urllib.urlopen(imageURL)
print(imageURL)
global picCount
global picPath
fileName = picPath+"/%d.jpg"%(picCount)
data = u.read()
f = open(fileName, 'wb')
f.write(data)
picCount = picCount + 1
print u"图片",picCount,fileName
f.close()
#saveImg("http://touxiang.yeree.com/pics/4c/227088.jpg")
#http://touxiang.yeree.com/m/tx/26623/list2.html
gHeads = {
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
}
def downloadOneClass(url_d):
nNum = 1
for i in range(nNum):
url = url_d%(i+2)
print url
#response = urllib2.urlopen(url)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {'username' : 'cqc', 'password' : 'XXXX' }
headers = { 'User-Agent' : user_agent }
data = urllib.urlencode("")
request = urllib2.Request(url, data, headers)
response = urllib2.urlopen(request)
page = response.read()
#print page
pattern = re.compile('http://touxiang.yeree.com/pics/[a-zA-Z0-9]+/\d+.jpg')
items = re.findall(pattern,page)
for item in items:
saveImg(item)
return
def main():
list = []
for i in range(1000):
intRandom = random.uniform(1, 20000)
if list.count(intRandom)>0:
continue
list.append(intRandom)
url_d = "http://touxiang.yeree.com/m/tx/%d"%(intRandom)+ "/list%d.html"
downloadOneClass(url_d)
if picCount>1000:
print "1000 get"
return
mkdir(picPath)
main()
图片抓取之后,可以用ps的批处理,将jpg图片修改为132*132分辨率的png图片,用于微信头像。