requests库详解

requests库是基于urllib库改写的,使用起来比urllib库更加简洁易用。进行爬虫时我们一般更多地会使用requests库。

安装requests

Python3

pip3 install requests

Python2

pip install requests
请求方式

有get、post、put、delete、head、options几种请求方式,常用的就是get和post请求

import requests

requests.get('http://httpbin.org/get')
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
Get请求
基本用法
import requests

req = requests.get('http://httpbin.org/get')
print(req.text) #以文本形式输出
带参数的
import requests
#方法一
req = requests.get('http://httpbin.org/get?name=**&age=**')
print(req.text)

#方法二
params = {
    'name': 'albert',
    'age': 1
}
req = requests.get('http://httpbin.org/get', params = params)
print(req.text)
添加headers
import requests

headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
req = requests.get('http://httpbin.org/get', headers = headers)
print(req.text)
Post请求
基本用法
import requests

data = {'name' : 'albert', 'age' : 1}
req = requests.post('http://httpbin.org/post', data = data)
print(req.text)
添加headers
import requests

data = {'name' : 'albert', 'age' : 1}
headers = {
    'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
}
req = requests.post('http://httpbin.org/post', data = data, headers = headers)
print(req.text)
获取二进制数据(图片、视频等)并保存
import requests

req = requests.get('https://avatar.youkuaiyun.com/9/5/4/1_a564126786.jpg?1546960150')
print(req.content) #以bytes形式输出
with open('img.jpg', 'wb') as f:
    f.write(req.content)
JSON响应内容
import requests

req = requests.get('http://httpbin.org/get')
print(req.json())  #将请求信息转换为JSON,与使用json库json.loads(req.text)相同

若是JSON解码失败,会抛出异常;但是没有抛出异常不表示响应成功。服务器可能会在失败响应中包含JSON对象,这种JSON会被解码返回,要检查请求是否成功,使用req.raise_for_status()或者req.status_code来判断

import requests

try:
	req = requests.get('http://httpbin.org/get')
	req.raise_for_status() #若状态码不是200,则抛出异常
except requests.RequestException as e:
	print('error:', e)
else:
	print(req.json())
响应属性
import requests

req = requests.get('http://www.baidu.com')
print(type(req)) #<class 'requests.models.Response'>
print(req.status_code) #200
print(req.headers) #响应头信息
print(req.headers['xxx']) 或 print(req.headers.get('xxx')) #获取某个响应头信息
print(req.cookies) #<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
print(req.url) #http://www.baidu.com/
print(req.history) #[] 请求历史,以list形式输出
文件上传
import requests

files = {'file' : open('img.jpg', 'rb')}
#files = {'file' : ('img.jpg', open('img.jpg', 'rb'), 'image/jpeg', {'Expires': '0'})} #设置文件名,文件类型和请求头
req = requests.post('http://httpbin.org/post', files = files)

print(req.text)
获取cookie
import requests

req = requests.get('http://www.baidu.com')
print(req.cookies)
for k, v in req.cookies.items():
    print(k + '=' + v)
会话维持(一般用于保存登陆状态)
import requests 

s = requests.session()
s.get('http://httpbin.org/cookies/set/number/123456')
req = s.get('http://httpbin.org/cookies')
print(req.text)
证书验证
import requests
from requests.packages import urllib3

urllib3.disable_warnings() #可以消除取消证书验证后出现的警告信息
req = requests.get('https://www.12306.cn', verify = False) #verify表示是否进行证书验证
print(req.status_code) #200
代理设置
import requests

proxies = {
    'http': 'http://user:password@127.0.0.1:1087',
    'https': 'https://user:password@127.0.0.1:1087'
}
req = requests.get('https://www.taobao.com', proxies = proxies)
print(req.text)
认证设置

有些网址需要进行身份认证,就要进行认证设置

import requests
from requests.auth import HTTPBasicAuth

req = requests.get('http://httpbin.org/hidden-basic-auth/user/passwd', auth=HTTPBasicAuth('user', 'passwd'))
#req = requests.get('http://httpbin.org/hidden-basic-auth/user/passwd', auth = ('user' , 'password')) #简写
异常处理

requests抛出的异常都继承自Requests.exceptions.RequestException,包括ConnectionError、HTTPError、Timeout、TooManyRedirects

import requests
from requests.exceptions import Timeout, HTTPError, ,ConnectionError, TooManyRedirects, RequestException

try:
	req = requests.get('http://www.baidu.com', timeout = 0.01)
	req.raise_for_status()

except Timeout as e: #请求超时
	print('Timeout:', e)
except HTTPError as e: #状态码不为200
	print('HTTPError:', e)
except ConnectionError as e: #DNS查询失败、拒绝连接等
	print('ConnectionError:', e)
except TooManyRedirects as e: #超过设定的最大重定向次数
	print('TooManyRedirects:', e)
except RequestException as e: 
	print('RequestException:', e)
else:
	print(req.json())
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值