urllib模块使用

本文详细介绍了如何使用Python的urllib.request库进行HTTP请求,包括GET和POST方法,设置超时,处理响应类型、状态码、响应头以及使用ProxyHandler进行代理设置。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

urllib.request.urlopen(url, data=None, [timeout, ]*, cafile=None, capath=None, cadefault=False, context=None)

import urllib.request

url = 'http://httpbin.org/ip'
response = urllib.request.urlopen(url)
html = response.read()  # 返回bytes类型数据
print(html)

url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
html = response.read().decode('utf-8') # 通过decode()方法将bytes类型数据转化为str类型数据
print(html)
发送post数据
import urllib.request
import urllib.parse

url = 'http://httpbin.org/post'

data = {
    'name' : "小明",
    'age' : 30
}
# data = urllib.parse.urlencode(data)  # Error: POST data should be bytes, an iterable of bytes, or a file object. It cannot be of type str
# data = urllib.parse.urlencode(data).encode('utf-8')
data = bytes(urllib.parse.urlencode(data),encoding="utf-8")
response = urllib.request.urlopen(url, data=data)
html = response.read().decode('utf-8')
print(html)
设置timeout
import urllib.request

url = 'http://httpbin.org/get'
response = urllib.request.urlopen(url, timeout=1)
html = response.read().decode('utf-8')
print(html)
import socket
import urllib.request
import urllib.error

url = 'http://httpbin.org/get'
try:
    response = urllib.request.urlopen(url, timeout=0.1)
    html = response.read().decode('utf-8')
    print(html)
except urllib.error.URLError as e:
    print("捕获异常....")
    print(e.reason)
    if isinstance(e.reason, socket.timeout):
        print("请求超时")

响应

响应类型、状态码、响应头、实际获取的url
import urllib.request

url = 'http://www.python.org'
response = urllib.request.urlopen(url)
# 响应类型
response_type = type(response)
print(response_type)  # <class 'http.client.HTTPResponse'>
# 状态码
status_code = response.getcode()
print(status_code)
# 状态码对应的信息
status = response.reason
print(status)    # 比如 200对应Ok, 404对应Not Found
# 响应头
response_headers = response.getheaders()  # 返回列表
print(response_headers)
server_type = response.getheader('Server') # getheader()获取响应头的指定部分信息
print(server_type)
print(type(response.headers))  # <class 'http.client.HTTPMessage'>
content_type = response.headers['Content-Type'] # 获取Content-Type
print(content_type)
# 实际获取的url, 可以用来判断是否发生重定向
actual_url = response.geturl()
print(actual_url)

 

ProxyHandler(代理)

import urllib.request

# 字典,key为协议类型,value 为 ip地址:端口号
proxy_dict = {
     #配置代理ip和端口,一定要写成http://+ip+port这种形式,不能去掉前面的http://,否则就会产生错误。
    'http': 'http://127.0.0.1:6688', 
    'https': 'https://127.0.0.1:6688',
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36',
}


proxy_hanlder = urllib.request.ProxyHandler(proxy_dict)
opener = urllib.request.build_opener(proxy_hanlder)
urllib.request.install_opener(opener)

opener.addheaders = headers.items()   # 设置请求头

url = 'http://www.whatismyip.com.tw/' # 被代理的域名或ip
response = urllib.request.urlopen(url)
print(response.read().decode('utf-8'))
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值