爬虫get请求和post请求学习笔记

本文介绍如何使用Python的urllib库进行GET和POST请求,并展示了如何处理超时、状态码及响应头等常见问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import urllib.request#获取一个get请求

import urllib.parse   #获取一个pos请求

import urllib.error

#获取一个get请求
#代码001:
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
# data2 = urllib.request.urlopen(data1).read()
# data3 = urllib.request.urlopen(data1)
# data4 = urllib.request.urlopen(data1).read().decode('utf-8') #转码
# print(data2)
# print(data3)
# print(data4)

# 代码002:(未声明headers,会报错)
# response = urllib.request.urlopen("http://www.baidu.com")
# print(response)
# print(response.read())

#获取一个pos请求
# data=bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8")
#
# response = urllib.request.urlopen("http://httpbin.org/post",data=data)
# print(response.read().decode("utf-8"))

#get请求,未声明headers会报错
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("http://httpbin.org/get",headers=headers)
# response1= urllib.request.urlopen(response).read()
# print(response1.decode("utf-8"))

#对超时的处理,比如0.01s
#拿上述例子举例
#未使用try/except语句:
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("http://httpbin.org/get", headers=headers)
# response1 = urllib.request.urlopen(response, timeout=0.01)
# # response1 = urllib.request.urlopen(response)
# print(response1.read().decode("utf-8"))

#使用try/except语句:
# import socket    ##引入socket库
# try:
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
#     # data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
#     response = urllib.request.Request("http://httpbin.org/get", headers=headers)
#     response1= urllib.request.urlopen(response, timeout=0.01)
#     print(response1.read().decode("utf-8"))
# except socket.timeout as e:
# # except Exceptio as e:##正常运行
# # except urllib.error.URLError as e:## 报错socket.timeout: timed out
#     print("time out!")

#返回状态码:
# headers = {
#         'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'}
# # data1 = urllib.request.Request("http://httpbin.org/post", headers=headers)
#
# response = urllib.request.Request("https://movie.douban.com/top250", headers=headers)
# # response = urllib.request.Request("https://movie.douban.com/top250")
# response1 = urllib.request.urlopen(response, timeout=3)
# response2 = urllib.request.urlopen(response, timeout=3).read().decode("utf-8")
#
#
# # print(response1.read().decode("utf-8"))
# # print(response1.status)  # 返回状态码http://httpbin.org/get
# # print(response1.getheaders)##getheaders加不加s、加不加()区别很大
# # print(response2.getheaders())####AttributeError: 'str' object has no attribute 'getheaders'
# print(response1.getheaders())
# print(response1.getheaders)

# print(response1.getheader("server"))



#####封装请求req(post请求)
url="http://httpbin.org/post"
data=bytes(urllib.parse.urlencode({"name":"Jack"}),encoding="utf-8") #不理解
headers = {'User-Agent': 'User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'} #字典类型
req=urllib.request.Request(url=url,data=data,headers=headers,method="POST")
response=urllib.request.urlopen(req)
print(response.read().decode("utf-8"))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值