Requests
- get请求
# get请求
import requests
data = {
'name': 'germey',
'age': 22
}
r = requests.get('http://httpbin.org/get', params=data)
print(r.text)
# 返回JSON格式字符串转化的字典dict
print(r.json())
print(type(r.json())) # dict
- 抓取网页
import requests
import re
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
r = requests.get("https://www.zhihu.com/explore", headers=headers)
pattern = re.compile('data-za-detail-view-id="5799">(.*?)</a>', re.S)
titles = re.findall(pattern, r.text)
print(titles)
- 抓取二进制数据(图片,音频,视频)
# 抓取二进制数据(图片,音频,视频)
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
r = requests.get("https://static.zhihu.com/heifetz/assets/logo.f6eef033.png", headers=headers)
# 以二进制的方式打开文件
with open('favicon.ico', 'wb') as f:
# 将content写入favicon.ico
f.write(r.content)
print(r.text)
print(r.content)
- post请求
使用了data属性
import requests
data = {'name': 'germey', 'age': '22'}
r = requests.post('http://httpbin.org/post', data=data)
print(r.text)
- 内置状态码
# 内置状态码
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
r = requests.get('http://www.jianshu.com', headers=headers)
requests.codes.ok就是内置状态码
if not r.status_code == requests.codes.ok:
exit()
else:
print('requests successfully')
- 文件上传
# 文件上传
import requests
# 字典类型
files = {'file': open('favicon.ico', 'rb')}
# 传入files属性
r = requests.post('http://httpbin.org/post', files=files)
print(r.text)
- cookies
# 成功
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
'Cookie': '_zap=0ed08b6f-0536-4fe7-af62-4f6d8ed6f4dd; _xsrf=F6Y90l9FURL5ECK8gpYlQGQps7Yuivfk; _ga=GA1.2.1860576024.1590395089; _gid=GA1.2.1869275334.1590395089; d_c0="AIASYHrPUhGPTpVkggw52w-Mh96pfWj1TjA=|1590395092"; l_n_c=1; n_c=1; tst=r; _gat_gtag_UA_149949619_1=1; SESSIONID=jkiAxmQ7ZDL2AVy0A6H2PpVApGT52r439qr1fwSUNry; JOID=V1kQAEhA5dksKo32OEZXTMWDIacpLoq2bVTkowkcpZEYa92LDAyGE3QojfQ-WedhwryiHnxwLk17_0_1haUlr6E=; osd=U1sUAUNE590tIYn0PEdcSMeHIKwtLI63ZlDmpwgXoZMcataPDgiHGHAqifU1XeVlw7emHHhxJUl5-07-gachrqo=; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1590397983,1590398005,1590452552,1590452558; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1590452558; capsion_ticket="2|1:0|10:1590452558|14:capsion_ticket|44:OGM4Y2Y5YjRlMmMzNDg1NDg1ZDAyZjJiZTVmYjMwMmQ=|417320164b89b182d1b748aa5bc038129659f288d652a60944b3f07e98bc578f"; r_cap_id="ZmE5MTRmMzAxYzE5NDJlNWJlYzEyZDU5NWZkMTRjYzI=|1590452574|34668f68864e61b885bc460d2c978ad111aaf3fe"; cap_id="YzM5OGE4Mzk4MGE1NDk3OWFlY2JiMzVlNDkyZTI5YTY=|1590452574|2fecbf69fe55407269237f025db14726b9c516b5"; l_cap_id="N2UyNTcwYjhmODA4NDlhMTk5YzAwZWRhMWRkZjY2NTQ=|1590452574|a21b1162ec4e63037e5057e0781b81f2a014afcf"; z_c0=Mi4xZklkdUVBQUFBQUFBZ0JKZ2VzOVNFUmNBQUFCaEFsVk5iSy01WHdDUXRYSUxHMUhBa2hyc0dOdzcxc2gtMGRQa2dn|1590452588|c0a7c1e8e1b382331ac9396de95eab32e664564c; KLBRSID=d6f775bb0765885473b0cba3a5fa9c12|1590452589|1590452548',
'Host': 'www.zhihu.com'
}
r = requests.get('https://www.zhihu.com', headers=headers)
print(r.text)
# Cookies
# 获取cookies
import requests
r = requests.get("https://www.tianyancha.com")
print(r.cookies)
# 使用items()转化为元组列表
for key, value in r.cookies.items():
print(key + '=' + value)
- session
# 使用session获取cookie
import requests
# 创建session对象
s = requests.Session()
# 使用session设置cookie
s.get('http://httpbin.org/cookies/set/number/123456789')
# 使用session获取发送get请求
r = s.get('http://httpbin.org/cookies')
# 成功获取cookies
print(r.text)
# 使用session可以模拟在一个浏览器中打开同一站点的不同页面
# 通常用于模拟登录成功之后在进行下一步操作
- SSL证书验证
# SSL证书验证
import requests
response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)
- Prepared Request
from requests import Request, Session
url = 'http://httpbin.org/post'
data = {
'name': 'germey'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
# 创建Session对象
s = Session()
# 使用Request构造方法创建request对象
req = Request('POST', url, headers=headers, data=data)
# 使用session的prepare_request方法创建prepared request对象
prepped = s.prepare_request(req)
# 用send方法发送请求
r = s.send(prepped)
print(r.status_code)
Max retries exceeded报错!
在requests请求后面加上verify=False,禁用证书
requests.get(url, headers, verify=False)
加完verify=False之后还会有警告信息,在requests请求之前加上
import urllib3
urllib3.disable_warnings()
requests.get(url, headers, verify=False)