urllib 是 Python 内置的 HTTP 请求库,无需额外安装,非常适合初学者快速上手网络请求操作。下面我将通过实用示例带你掌握这个库的核心功能。
基础使用
发送 GET 请求
from urllib.request import urlopen
# 发送最简单的 GET 请求
response = urlopen('https://httpbin.org/get')
# 获取响应内容
content = response.read()
print(f"响应内容:\n{content.decode('utf-8')}")
# 获取响应状态码
print(f"\n状态码: {response.status}")
# 获取响应头信息
print("\n响应头信息:")
for header, value in response.getheaders():
print(f"{header}: {value}")
发送 POST 请求
from urllib.request import urlopen, Request
from urllib.parse import urlencode
# 准备 POST 数据
data = {
'username': 'python_user',
'password': 'secure_password123',
'action': 'login'
}
encoded_data = urlencode(data).encode('utf-8')
# 创建请求对象并发送
request = Request(
url='https://httpbin.org/post',
data=encoded_data,
method='POST'
)
response = urlopen(request)
# 打印响应
print("POST请求响应:")
print(response.read().decode('utf-8'))
高级功能
自定义请求头
from urllib.request import Request, urlopen
# 设置自定义请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/103.0.0.0',
'Accept': 'application/json',
'X-Custom-Header': 'PythonUrllibTutorial'
}
# 创建带自定义头的请求
request = Request(
url='https://httpbin.org/headers',
headers=headers
)
response = urlopen(request)
print("自定义请求头响应:")
print(response.read().decode('utf-8'))
处理 JSON 响应
import json
from urllib.request import urlopen
# 请求返回 JSON 的 API
response = urlopen('https://api.github.com/users/python')
data = json.loads(response.read().decode('utf-8'))
print("\nGitHub用户信息:")
print(f"用户名: {data['login']}")
print(f"名称: {data['name']}")
print(f"粉丝数: {data['followers']}")
print(f"仓库数: {data['public_repos']}")
print(f"个人主页: {data['html_url']}")
异常处理
from urllib.error import HTTPError, URLError
from urllib.request import urlopen
try:
# 尝试访问不存在的页面
response = urlopen('https://httpbin.org/status/404')
print(response.read())
except HTTPError as e:
print(f"HTTP错误! 状态码: {e.code}")
print(f"错误信息: {e.reason}")
except URLError as e:
print(f"URL错误! 原因: {e.reason}")
except Exception as e:
print(f"其他错误: {type(e).__name__}")
使用代理
from urllib.request import ProxyHandler, build_opener
# 设置代理 (实际使用时替换为有效代理)
proxy = ProxyHandler({
'http': 'http://proxy.example.com:8080',
'https': 'https://proxy.example.com:8080'
})
# 创建带有代理的opener
opener = build_opener(proxy)
try:
# 使用代理发送请求
response = opener.open('https://httpbin.org/ip')
print("通过代理访问的IP信息:")
print(response.read().decode('utf-8'))
except Exception as e:
print(f"代理请求失败: {e}")
实用技巧
下载文件
from urllib.request import urlretrieve
def download_file(url, save_path):
"""下载文件并显示进度"""
def progress_report(count, block_size, total_size):
percent = int(count * block_size * 100 / total_size)
print(f"\r下载进度: {percent}%", end='')
print(f"开始下载: {url}")
urlretrieve(url, save_path, progress_report)
print("\n下载完成!")
# 下载示例图片
download_file(
'https://example.com/sample.jpg', # 替换为实际图片URL
'sample_image.jpg'
)
解析 URL 组件
from urllib.parse import urlparse, parse_qs
url = "https://www.example.com:8080/path/to/page?name=John&age=30#section1"
# 解析 URL
parsed = urlparse(url)
print("URL解析结果:")
print(f"协议: {parsed.scheme}")
print(f"域名: {parsed.hostname}")
print(f"端口: {parsed.port}")
print(f"路径: {parsed.path}")
print(f"查询参数: {parsed.query}")
print(f"锚点: {parsed.fragment}")
# 解析查询字符串
query_params = parse_qs(parsed.query)
print("\n查询参数解析:")
print(f"name: {query_params.get('name', [''])[0]}")
print(f"age: {query_params.get('age', [''])[0]}")
完整示例:获取天气信息
import json
from urllib.request import urlopen
from urllib.error import URLError
def get_weather(city):
"""获取城市天气信息"""
try:
# 构建API请求URL (实际应用中替换为真实天气API)
url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid=YOUR_API_KEY&units=metric"
with urlopen(url) as response:
data = json.loads(response.read().decode('utf-8'))
# 提取天气信息
weather = {
'city': data['name'],
'temp': data['main']['temp'],
'feels_like': data['main']['feels_like'],
'humidity': data['main']['humidity'],
'description': data['weather'][0]['description'],
'wind_speed': data['wind']['speed']
}
return weather
except URLError as e:
print(f"网络错误: {e.reason}")
return None
except Exception as e:
print(f"发生错误: {type(e).__name__}")
return None
# 使用示例
if __name__ == "__main__":
city = "London"
weather = get_weather(city)
if weather:
print(f"\n{city}天气信息:")
print(f"温度: {weather['temp']}°C (体感 {weather['feels_like']}°C)")
print(f"湿度: {weather['humidity']}%")
print(f"天气状况: {weather['description'].capitalize()}")
print(f"风速: {weather['wind_speed']} m/s")
else:
print("无法获取天气信息")