import urllib.request
import re #写正则表达式要导入的
import os
import time
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'} #给请求指定一个请求头来模拟chrome浏览器
data1 = urllib.request.Request('https://detail.tmall.com/item.htm?spm=a1z10.5-b-s.w4011-22444605914.104.2a4a6736MQEHUm&id=599075050709&rn=19296cbee3fdc0c2a8dafdebd167a3fe&abbucket=19&sku_properties=20509:28383', headers=headers)
ur = urllib.request.urlopen(data1) #像目标url地址发送get请求,返回一个response对象
content = ur.read()
html = content.decode("utf-8", errors='ignore')
print(content)
ur.close()
# <img src="//img.alicdn.com/imgextra/i4/360512874/TB2fcY5lBUSMeJjy1zjXXc0dXXa_!!360512874.jpg_60x60q90.jpg"
# p = r'<img src="//img.alicdn.com/imgextra/(.*?)_60x60q90.jpg" /></a>'
p = r'<img src="//img.alicdn.com/imgextra/https(.*?)_60x60q90.jpg" /></a>'
pattern = re.compile(p)
chapter_photo_list=re.findall(pattern,html)
print(chapter_photo_list)
# os.mkdir('D:\BeautifulPicture') #创建文件夹
for chapter_photo in chapter_photo_list:
print(chapter_photo)
# url= "http://img.alicdn.com/imgextra" + chapter_photo + "_430x430q90.jpg"
url= "http://img.alicdn.com/imgextra/https" + chapter_photo + "_430x430q90.jpg"
# name=re.findall(r'i(.*?).jpg',chapter_photo)[0]
name=re.findall(r'://img.alicdn.com/(.*?).jpg',chapter_photo)[0]
name = name[30:]
urllib.request.urlretrieve(url,'D://BeautifulPicture/' + name +".jpg")
time.sleep(5)