Requests

Requests 崔老师爬虫系列课程学习笔记

install

pip install requests

examples

import requests

response = requests.get('http://www.baidu.com')
print(type(response))
print(response.status_code)
print(response.text)
print(response.cookies)

# 各种请求方式

import requests
requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/head')

请求

基本GET请求

import requests
response = requests.get('http://www.baidu.com')
print(response.text)

带参数的GET请求

import requests

response = requests.get('http://httpbin.org/get?name=germey&age=22')
print(response.text)
import requests

data = {
    'name':'germey',
    'age':22
}
response = requests.get('http://httpbin.org/get',params=data)
print(response.text)

解析JSON

import requests
import json

response = requests.get('http://httpbin.org/get')
print(response.json())
print(json.loads(response.text))

获取二进制类型

import requests

response = requests.get('http://seopic.699pic.com/photo/00013/4041.jpg_wh1200.jpg')

with open('祈福.jpg','wb') as f:
    f.write(response.content)
    f.close()

添加Headers

import requests

response = requests.get('http://www.zhihu.com/explore')
print(response.text)
import requests

headers = {

    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
}
response = requests.get('http://www.zhihu.com/explore',headers = headers)
print(response.text)

基本POST请求

import requests

data = {'name':'germey','age':22}
headers = {

    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'
}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)

响应

response属性

import requests
response = requests.get('http://www.baidu.com')
print(type(response.status_code),response.status_code)
print(type(response.cookies),response.cookies)
print(type(response.headers),response.headers)
print(type(response.url),response.url)
print(type(response.history),response.history)

状态码的判断

import requests

response = requests.get('http://www.jianshu.com/hello.html')
exit() 
if response.status_code == requests.codes.not_found:
    print('404 NOTFOUND')

高级操作

文件上传

import requests

files ={"files":open("祈福.jpg",'rb')}
response = requests.post('http://httpbin.org/post',files=files)
print(response.text)

获取Cookie

import requests

response = requests.get('http://www.baidu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key + '=' + value)

会话维持

模拟登陆

import requests 

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response = s.get('http://httpbin.org/cookies')
print(response.text)

证书验证

import requests
from requests.packages import urllib3
urllib3.disable_warnings()#消除警告信息

response = requests.get('https://www.12306.cn',verify=False)#使用verify参数可以避免网站证书不合法问题
print(response.status_code)
import requests

response = requests.get('https://www.12306.cn',cert=('/path/server.crt','/path/key'))
#手动添加本地证书
print(response.status_code)

代理设置

import requests
proxies = {
    "http":"http://127.0.0.1:51507",
    "https":"https://127.0.0.1:51507"
}

response = requests.get("http://www.baidu.com",proxies=proxies)
print(response.status_code)
200
import requests

proxies={    
    "http":"socks5://127.0.0.1:51507",
    "https":"socks5://127.0.0.1:51507"
}
response = requests.get("https://www.taobao.com",proxies=proxies)
print(response.status_code)

超时设置

import requests

response = requests.get("http://www.taobao.com",timeout=1)
print(response.status_code)
200

认证设置

import requests
from requests.auth import HTTPBasicAuth

response = requests.get('http://www.++++++++++.com'),auth=HTTPBasicAuth('13026156724','zhou3210'))
print(response.text)
import requests


response = requests.get('http://www.++++++++++.com'),auth=('13026156724','zhou3210'))
print(response.text)

异常处理

import requests
from requests.exceptions import HTTPError,ConnectionError,ReadTimeout

try:
    response = requests.get('http://www.baidu.com',timeout=0.01)
except HTTPError:
    print('HTTPError')
except ConnectionError:
    print('ConnectionError')
except ReadTimeout:
    print('ReadTimeout')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值