爬取糗事百科用户的头像图片
#爬取图片的关键:构建头像的正则表达式
# pattern = '<img src="//([^\s:;]+\.(\w|/)*(.jpg|.JPEG)?\?imageView2/1/w/90/h/90)"'
import re
import urllib.request
def getimg(url,page):
# 设置头文件,模拟成浏览器爬取网页
headers = {
'Connection':'keep-alive',
'Accept-Language':'zh-CN,zh;q=0.9',
'Accept':'text/html,application/xhtml+xml,application/xml;\
q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}
headall = []
for key,value in headers.items():
items = (key,value)
headall.append(items)
print(headall) # 测试点1:输出头文件
# 设置 opener 对象
opener = urllib.request.build_opener()
opener.addheaders = headall
# 将opener对象设置成全局模式
urllib.request.install_opener(opener