Python之Requests请求数据

本文深入讲解了Python的Requests库,包括安装、基本用法、GET和POST请求、处理Cookies、超时配置、会话管理、SSL证书验证、使用代理及爬虫应用。适合初学者快速上手并掌握Requests库的各种功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

安装

pip install requests

用法

import requests,json
#get请求=================================================

#params用来加参数,headers来传headers参数
'''
payload = {'key1':'value1','key2':'value2'}
headers = {'content-type':'application/json'}
r = requests.get('http://httpbin.org/get',params=payload,headers=headers)
print(r.url)
#http://httpbin.org/get?key2=value2&key1=value1
'''
#请求json文件
'''
r = requests.get("http://100.39.8.54:8080/a.json")
print(r.text)
print(r.json())
'''
#获取来自服务器的原始套接字响应,在初始请求中设置stream=True
'''
r = requests.get('https://github.com/timeline.json',stream=True)
print(r.raw)
#<urllib3.response.HTTPResponse object at 0x00000000034ECB70>
print(r.raw.read(10))
#b'{"message"'
'''

#post请求================================================

#利用data来传参数,表单形式
'''
payload = {'key1':'value1','key2':'value2'}
r = requests.post("http://httpbin.org/post",data=payload)
print(r.text)
'''
'''
{
 	"args": {}, 
	"data": "", 
	"files": {}, 
	 "form": {
	   "key1": "value1", 
	   "key2": "value2"
	 }, 
	 "headers": {
	   "Accept": "*/*", 
	   "Accept-Encoding": "gzip, deflate", 
	   "Connection":"close",
	   "Content-Length": "23", 
	   "Content-Type": "application/x-www-form-urlencoded", 
	   "Host": "httpbin.org", 
	   "User-Agent": "python-requests/2.18.4"
	 }, 
	 "json": null, 
	 "origin":"124.65.241.202",
	 "url": "http://httpbin.org/post"
}
'''
#利用json.dumps()把表单数据序列化,传json格式
'''
url = 'http://httpbin.org/post'
payload = {'some':'data'}
r = requests.post(url,data=json.dumps(payload))
print(r.text)
'''
'''
{
 "args": {}, 
 "data": "{\"some\": \"data\"}", 
 "files": {}, 
 "form": {}, 
 "headers": {
   "Accept": "*/*", 
   "Accept-Encoding": "gzip, deflate",
   "Connection":"close", 
   "Content-Length": "16", 
   "Host": "httpbin.org", 
   "User-Agent": "python-requests/2.18.4"
 }, 
 "json": {
   "some": "data"
 },  
 "url": "http://httpbin.org/post"
}
'''
#用file参数上传文件
'''
url = 'http://httpbin.org/post'
files = {'file':open('a.txt','rb')}#a.txt里面内容为hello world!
r = requests.post(url,files=files)
print(r.text)
'''
'''
{
 "args": {}, 
 "data": "", 
 "files": {
   "file": "Hello World!"
 }, 
 "form": {}, 
 "headers": {
   "Accept": "*/*", 
   "Accept-Encoding": "gzip, deflate", 
   "Connection":"close", 
   "Content-Length": "153", 
   "Content-Type": "multipart/form-data; >boundary=7d8eb5ff99a04c11bb3e862ce78d7000", 
   "Host": "httpbin.org", 
   "User-Agent": "python-requests/2.18.4"
 }, 
 "json": null, 
 "origin":"124.65.241.202",
 "url": "http://httpbin.org/post"
}
'''
#requests是支持流式上传的,允许发送大的数据流而无需先把他们读入内存。要使用流式上传,需要为请求体提供一个类文件对象。
'''
with open('massive-body') as f:
	response = requests.post('http://some.url/streamed',data=f)
	print(response.text)
'''

#Cookies======================================================
'''
response = requests.get("http://www.baidu.com/")
#返回CookieJar对象
cookiejar = response.cookies
#将CookieJar转为字典
cookiedict = requests.utils.dict_from_cookiejar(cookiejar)
print(cookiejar)
print(cookiedict)
'''
'''
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
{'BDORZ': '27315'}
'''

#超时配置=====================================================
#timeout变量来配置最大请求时间,timeout仅对连接过程有效,与响应体的下载无关
'''
r = requests.get('http://github.com',timeout=0.001)
print(r)
'''

#会话对象=====================================================
#通过cookies实现持久会话
'''
#会话1:
s = requests.Session()
#设置cookies
s.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
#获得cookies
r = s.get("http://httpbin.org/cookies")
print(r.text)
'''
'''
{
  "cookies": {
    "sessioncookie": "123456789"
  }
}
'''
#会话是一个全局变量,进行全局配置
'''
s = requests.Session()
s.headers.update({'x-test':'true'})
r = s.get('http://httpbin.org/headers',headers={'x-test2':'true'})
print(r.text)
'''
'''
{
  "headers": {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate",
    "Connection": "close",
    "Host": "httpbin.org",
    "User-Agent": "python-requests/2.18.4",
    "X-Test": "true",
    "X-Test2": "true"
  }
}
'''
#如果不想要全局配置中的一个变量设置为None即可
'''
s = requests.Session()
s.headers.update({'x-test':'true'})
r = s.get('http://httpbin.org/headers',headers={'x-test':None})
print(r.text)
'''
'''
{
  "headers": {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate",
    "Connection": "close",
    "Host": "httpbin.org",
    "User-Agent": "python-requests/2.18.4"
  }
}
'''

#SSL证书验证===============================================
#Requests可以为HTTPS请求验证SSL证书,就想web浏览器一样
#如果想跳过12306的证书验证,把verify设置为False即可
'''
r = requests.get('https://kyfw.12306.cn/otn/',verify=False)
print(r.text)
'''
'''
也可以引入Python关于证书的处理模块SSL
1.导入Python SSL处理模块
import ssl
2.标识忽略未经核实的SSL证书认领
context = ssl._create_unverified_context()
'''

#代理======================================================
#如果需要使用代理,可以通过为任意请求方法提供proxies参数来配置单个请求
#根据协议类型,选择不同的代理
'''
proxies = {
  "https":"http://41.118.132.69:4433",
  "http":"http://41.118.132.69:4433"
}
r = requests.post("http://httpbin.org/post",proxies=proxies)
print(r.text)
'''
#私密代理(特定格式)
#如果代理需要使用HTTP Basic Auth,可以使用下面这种格式
'''
proxy = {"http": "mr_mao_hacker:sffqry9r@61.158.163.130:16816"}
response = requests.get("http://www.baidu.com",proxies = proxy)
print(response.text)
'''
#web客户端验证,需要添加auth=(账户名,密码)
'''
auth=('test','123456')
response = requests.get('http://192.168.199.107',auth = auth)
print(response.text)
'''
#也可以通过环境变量HTTP_PROXY和HTTPS_PROXY来配置代理
'''
export HTTP_PROXY = "http://10.10.1.10:3128"
export HTTPS_PROXY ="HTTP://10.10.1.10:1080"
'''

#爬取天气
response = requests.get("http://www.sojson.com/open/api/weather/json.shtml?city=%E9%83%91%E5%B7%9E")
response.encoding="utf-8"
print(response.text)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值