这是是慕课网《Python开发简单爬虫》中网页下载器的三种实现方法,课程用的是python2.7,这里用最新的3.4.4实现出来,分享给新人:
import urllib.request
from http.cookiejar import CookieJar
url = 'http://www.baidu.com'
print('第一种方法')
res1 = urllib.request.urlopen(url)
print(res1.getcode()) # 打印状态码,200表示成功
print(len(res1.read()))
print('第二种方法')
# 添加header,伪装成Mozilla浏览器
request = urllib.request.Request(url, headers={'user-agent': 'Mozilla/5.0'})
res2 = urllib.request.urlopen(request)
print(res2.getcode())
print(len(res2.read()))
print('第三种方法')
cj = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener)
res3 = urllib.request.urlopen(url)
print(res3.getcode())
print(cj) # 打印网页cookie
print(res3.read()) # 打印网页源代码
下载地址: http://download.youkuaiyun.com/detail/sunflowerduidui/9480365