爬虫get请求和post请求学习笔记

最新推荐文章于 2024-03-21 16:36:22 发布

原创最新推荐文章于 2024-03-21 16:36:22 发布 · 157 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python

Python学习私人笔记专栏收录该内容

64 篇文章

订阅专栏

本文介绍如何使用Python的urllib库进行GET和POST请求，并展示了如何处理超时、状态码及响应头等常见问题。

import urllib.request#获取一个get请求

import urllib.parse   #获取一个pos请求

import urllib.error

#获取一个get请求
#代码001：
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
# data2 = urllib.request.urlopen(data1).read()
# data3 = urllib.request.urlopen(data1)
# data4 = urllib.request.urlopen(data1).read().decode('utf-8') #转码
# print(data2)
# print(data3)
# print(data4)

# 代码002：(未声明headers，会报错）
# response = urllib.request.urlopen("http://www.baidu.com")
# print(response)
# print(response.read())

#获取一个pos请求
# data=bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")
#
# response = urllib.request.urlopen("http://httpbin.org/post",data=data)
# print(response.read().decode("utf-8"))

#get请求,未声明headers会报错
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("http://httpbin.org/get",headers=headers)
# response1= urllib.request.urlopen(response).read()
# print(response1.decode("utf-8"))

#对超时的处理，比如0.01s
#拿上述例子举例
#未使用try/except语句：
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("http://httpbin.org/get", headers=headers)
# response1 = urllib.request.urlopen(response, timeout=0.01)
# # response1 = urllib.request.urlopen(response)
# print(response1.read().decode("utf-8"))

#使用try/except语句：
# import socket    ##引入socket库
# try:
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
#     # data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
#     response = urllib.request.Request("http://httpbin.org/get", headers=headers)
#     response1= urllib.request.urlopen(response, timeout=0.01)
#     print(response1.read().decode("utf-8"))
# except socket.timeout as e:
# # except Exceptio as e:##正常运行
# # except urllib.error.URLError as e:## 报错socket.timeout: timed out
#     print("time out！")

#返回状态码：
# headers = {
#         'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# # data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("https://movie.douban.com/top250", headers=headers)
# # response = urllib.request.Request("https://movie.douban.com/top250")
# response1 = urllib.request.urlopen(response, timeout=3)
# response2 = urllib.request.urlopen(response, timeout=3).read().decode("utf-8")
#
#
# # print(response1.read().decode("utf-8"))
# # print(response1.status)  # 返回状态码http://httpbin.org/get
# # print(response1.getheaders)##getheaders加不加s、加不加（）区别很大
# # print(response2.getheaders())####AttributeError: 'str' object has no attribute 'getheaders'
# print(response1.getheaders())
# print(response1.getheaders)

# print(response1.getheader("server"))



#####封装请求req(post请求）
url="http://httpbin.org/post"
data=bytes(urllib.parse.urlencode({"name":"Jack"}),encoding="utf-8") #不理解
headers = {'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'} #字典类型
req=urllib.request.Request(url=url,data=data,headers=headers,method="POST")
response=urllib.request.urlopen(req)
print(response.read().decode("utf-8"))