# -*- coding: UTF-8 -*-
import urllib2,urllib,os
'''
Created on 2017年4月16日
@author: 恋蝶
'''
mmurl = "https://mm.taobao.com/json/request_top_list.htm?type=0&page="
temp = "https://mm.taobao.com/self/aiShow.htm?spm=719.7763510.1998643336.1.pI7wQq&userId="
T = '''src="'''
wjming = "E:\\Pythonpaqu\\Python\\picimg\\"
i = 0
ph = -1
while i < 16:
url = mmurl + str(i)
up = urllib2.urlopen(url)
cont = up.read()
ahref = "user_id="
target = '''target="_blank"'''
h=1
pa = cont.find(ahref)
pt = cont.find(target, pa)
while cont.find(ahref)>=0:
userid = cont[pa + len(ahref): pt - 2]
grzxurl = temp + userid
mmup = urllib2.urlopen(grzxurl)
imgcont = mmup.read()
mtname = cont[pa + len(ahref): pt + 36]
mta = '''target="_blank">'''
mtb = '''</a>'''
mtapa = mtname.find(mta)
mtapb = mtname.find(mtb,mtapa)
wjname = mtname[mtapa + len(mta) : mtapb]
wjname = str(i) + wjname + str(h)
h +=1
zzwjm = wjming + wjname
isExists=os.path.exists(zzwjm)
if not isExists:
os.makedirs(zzwjm)
print (zzwjm)
imgsty = '''img style="float:'''
tstar = "tstar.jpg"
imgpa = imgcont.find(imgsty)
imgpt = imgcont.find(tstar,imgpa)
l=1
while imgcont.find(imgsty)>=0:
whileimg = imgcont[imgpa + len(imgsty): imgpt+9]
imgcont = imgcont[imgpt:]
imgpa = imgcont.find(imgsty)
imgpt = imgcont.find(tstar,imgpa)
temurl = whileimg.find(T)
imgurl = "http:" + whileimg[temurl + len(T):]
urllib.urlretrieve(imgurl, zzwjm +"\\tu"+ str(l) + ".jpg")
l +=1
cont = cont[pt:]
pa = cont.find(ahref)
pt = cont.find(target, pa)
i += 1
python爬虫技术实现图片提取
最新推荐文章于 2024-03-12 15:25:27 发布