爬取京东图书列表页图片


import re,os
import urllib.request
from time import sleep

url='https://list.jd.com/list.html?cat=1713,3287,3797'
'''
<img width="200" height="200" data-img="1" data-lazy-img="done" title="" src="//img13.360buyimg.com/n7/jfs/t2725/230/173868818/289288/7ba63f78/5706731bNe3c6623d.jpg">
'''

def craw(url,page):
    html = urllib.request.urlopen(url).read().decode()
    pat = '<img width="200" height="200" .*>'
    result = re.compile(pat).findall(html)

    pat1 = 'data-lazy-img="//(.+?\.jpg)">'

    img = re.compile(pat1).findall(str(result))

    k = 0
    for i in img:
        k += 1

        imgname = "C:/Users/os/Desktop/img/"  +str(page)+ '/' + str(k) + ".jpg"
        urllib.request.urlretrieve("http://" + i, filename=imgname)
        sleep(1)
        print(k)

    pat2 = 'src="//(.+?\.jpg)">'
    img2 = re.compile(pat2).findall(str(result))

    for i in img2:
        k *= 10
        imgname= "C:/Users/os/Desktop/img/" + str(page) + '/' +str(k) + ".jpg"
        urllib.request.urlretrieve("http://" +i,filename=imgname)
        sleep(1)
        print(k)






if __name__ == '__main__':

     for i in range(3,100):
        os.mkdir('C:/Users/os/Desktop/img/' + str(i))
        url = 'https://list.jd.com/list.html?cat=1713,3287,3797&page='+str(i)
        print(url)
        craw(url,i)
        sleep(1)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值