python爬取图片

import urllib.request
import re #写正则表达式要导入的
import os
import time
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'}  #给请求指定一个请求头来模拟chrome浏览器
data1 = urllib.request.Request('https://detail.tmall.com/item.htm?spm=a1z10.5-b-s.w4011-22444605914.104.2a4a6736MQEHUm&id=599075050709&rn=19296cbee3fdc0c2a8dafdebd167a3fe&abbucket=19&sku_properties=20509:28383', headers=headers)
ur = urllib.request.urlopen(data1) #像目标url地址发送get请求,返回一个response对象
content = ur.read()
html = content.decode("utf-8", errors='ignore') 
print(content)
ur.close()
# <img src="//img.alicdn.com/imgextra/i4/360512874/TB2fcY5lBUSMeJjy1zjXXc0dXXa_!!360512874.jpg_60x60q90.jpg"
# p = r'<img src="//img.alicdn.com/imgextra/(.*?)_60x60q90.jpg" /></a>'
p = r'<img src="//img.alicdn.com/imgextra/https(.*?)_60x60q90.jpg" /></a>'
pattern = re.compile(p)

chapter_photo_list=re.findall(pattern,html)
print(chapter_photo_list)
 
# os.mkdir('D:\BeautifulPicture')  #创建文件夹
for chapter_photo in chapter_photo_list:
    print(chapter_photo)
    # url=  "http://img.alicdn.com/imgextra" + chapter_photo + "_430x430q90.jpg"
    url=  "http://img.alicdn.com/imgextra/https" + chapter_photo + "_430x430q90.jpg"
    # name=re.findall(r'i(.*?).jpg',chapter_photo)[0]
    name=re.findall(r'://img.alicdn.com/(.*?).jpg',chapter_photo)[0]
    name = name[30:]
    urllib.request.urlretrieve(url,'D://BeautifulPicture/' + name +".jpg")
    time.sleep(5)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值