目录
什么是异步编程
异步编程是一种编程范式,允许程序在等待某些操作(如I/O操作)完成时,不阻塞程序的执行,而是继续执行其他任务。这种方式特别适合处理大量并发请求和I/O密集型任务。
同步 vs 异步
同步编程:
import time
def sync_task():
print("开始任务")
time.sleep(2) # 模拟耗时操作
print("任务完成")
# 执行多个任务
for i in range(3):
sync_task()
# 总耗时:6秒
异步编程:
import asyncio
async def async_task():
print("开始任务")
await asyncio.sleep(2) # 模拟异步耗时操作
print("任务完成")
# 执行多个任务
async def main():
tasks = [async_task() for _ in range(3)]
await asyncio.gather(*tasks)
asyncio.run(main())
# 总耗时:2秒
Python异步编程基础
核心关键字
1. async/await
async def:定义异步函数(协程函数)await:等待异步操作完成
async def fetch_data():
# 模拟网络请求
await asyncio.sleep(1)
return "数据获取完成"
async def main():
result = await fetch_data()
print(result)
2. 协程(Coroutine)
协程是可以暂停和恢复执行的函数:
async def my_coroutine():
print("协程开始")
await asyncio.sleep(1)
print("协程结束")
return "结果"
# 创建协程对象
coro = my_coroutine()
# 运行协程
result = asyncio.run(coro)
asyncio核心概念
1. 事件循环(Event Loop)
事件循环是异步编程的核心,负责调度和执行协程:
import asyncio
# 方式1:使用asyncio.run()(推荐)
asyncio.run(main())
# 方式2:手动管理事件循环
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.close()
2. 任务(Task)
Task是对协程的封装,可以并发执行:
async def worker(name, delay):
print(f"Worker {name} 开始")
await asyncio.sleep(delay)
print(f"Worker {name} 完成")
return f"结果-{name}"
async def main():
# 创建任务
task1 = asyncio.create_task(worker("A", 2))
task2 = asyncio.create_task(worker("B", 1))
# 等待所有任务完成
results = await asyncio.gather(task1, task2)
print(f"所有结果: {results}")
3. 并发执行
asyncio.gather()
async def main():
# 并发执行多个协程
results = await asyncio.gather(
fetch_data("API1"),
fetch_data("API2"),
fetch_data("API3")
)
return results
asyncio.wait()
async def main():
tasks = [
asyncio.create_task(worker(f"Worker-{i}", i))
for i in range(5)
]
done, pending = await asyncio.wait(
tasks,
return_when=asyncio.FIRST_COMPLETED
)
# 处理完成的任务
for task in done:
result = await task
print(f"完成: {result}")
异步编程实战技巧
1. 异步HTTP请求
使用aiohttp进行异步HTTP请求:
import aiohttp
import asyncio
async def fetch_url(session, url):
try:
async with session.get(url) as response:
return await response.text()
except Exception as e:
return f"错误: {e}"
async def fetch_multiple_urls(urls):
async with aiohttp.ClientSession() as session:
tasks = [fetch_url(session, url) for url in urls]
results = await asyncio.gather(*tasks)
return results
# 使用示例
urls = [
"https://httpbin.org/delay/1",
"https://httpbin.org/delay/2",
"https://httpbin.org/delay/3"
]
results = asyncio.run(fetch_multiple_urls(urls))
2. 异步文件操作
使用aiofiles进行异步文件操作:
import aiofiles
import asyncio
async def read_file_async(filename):
async with aiofiles.open(filename, 'r', encoding='utf-8') as f:
content = await f.read()
return content
async def write_file_async(filename, content):
async with aiofiles.open(filename, 'w', encoding='utf-8') as f:
await f.write(content)
async def process_files():
# 并发读取多个文件
files = ['file1.txt', 'file2.txt', 'file3.txt']
tasks = [read_file_async(f) for f in files]
contents = await asyncio.gather(*tasks)
# 处理内容并写入新文件
processed_content = '\n'.join(contents)
await write_file_async('output.txt', processed_content)
3. 异步数据库操作
使用asyncpg进行异步PostgreSQL操作:
import asyncpg
import asyncio
async def fetch_users(pool):
async with pool.acquire() as connection:
rows = await connection.fetch("SELECT * FROM users")
return [dict(row) for row in rows]
async def insert_user(pool, name, email):
async with pool.acquire() as connection:
await connection.execute(
"INSERT INTO users (name, email) VALUES ($1, $2)",
name, email
)
async def database_operations():
# 创建连接池
pool = await asyncpg.create_pool(
"postgresql://user:password@localhost/dbname"
)
try:
# 并发执行数据库操作
await asyncio.gather(
insert_user(pool, "Alice", "alice@example.com"),
insert_user(pool, "Bob", "bob@example.com"),
insert_user(pool, "Charlie", "charlie@example.com")
)
# 获取所有用户
users = await fetch_users(pool)
print(f"用户列表: {users}")
finally:
await pool.close()
性能优化与最佳实践
1. 连接池管理
class AsyncHTTPClient:
def __init__(self):
self.session = None
async def __aenter__(self):
connector = aiohttp.TCPConnector(
limit=100, # 总连接池大小
limit_per_host=30, # 每个主机的连接数
ttl_dns_cache=300, # DNS缓存时间
use_dns_cache=True
)
timeout = aiohttp.ClientTimeout(total=30)
self.session = aiohttp.ClientSession(
connector=connector,
timeout=timeout
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def get(self, url):
async with self.session.get(url) as response:
return await response.json()
# 使用示例
async def main():
async with AsyncHTTPClient() as client:
tasks = [client.get(f"https://api.example.com/data/{i}")
for i in range(100)]
results = await asyncio.gather(*tasks)
return results
2. 错误处理和重试机制
import asyncio
import random
from typing import Optional
async def retry_async(
coro_func,
max_retries: int = 3,
delay: float = 1.0,
backoff_factor: float = 2.0
):
"""异步重试装饰器"""
for attempt in range(max_retries + 1):
try:
return await coro_func()
except Exception as e:
if attempt == max_retries:
raise e
wait_time = delay * (backoff_factor ** attempt)
print(f"重试 {attempt + 1}/{max_retries}, 等待 {wait_time}s")
await asyncio.sleep(wait_time)
async def unreliable_api_call():
"""模拟不稳定的API调用"""
if random.random() < 0.7: # 70%失败率
raise Exception("API调用失败")
return "成功获取数据"
async def main():
try:
result = await retry_async(unreliable_api_call, max_retries=3)
print(f"结果: {result}")
except Exception as e:
print(f"最终失败: {e}")
3. 限流和并发控制
import asyncio
from asyncio import Semaphore
class RateLimiter:
def __init__(self, max_concurrent: int = 10):
self.semaphore = Semaphore(max_concurrent)
async def __aenter__(self):
await self.semaphore.acquire()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
self.semaphore.release()
async def limited_task(rate_limiter, task_id):
async with rate_limiter:
print(f"任务 {task_id} 开始执行")
await asyncio.sleep(1) # 模拟工作
print(f"任务 {task_id} 完成")
return f"结果-{task_id}"
async def main():
rate_limiter = RateLimiter(max_concurrent=3)
tasks = [
limited_task(rate_limiter, i)
for i in range(10)
]
results = await asyncio.gather(*tasks)
print(f"所有结果: {results}")
实战应用场景
1. Web爬虫
import aiohttp
import asyncio
from bs4 import BeautifulSoup
import time
class AsyncWebScraper:
def __init__(self, max_concurrent=10):
self.semaphore = asyncio.Semaphore(max_concurrent)
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def scrape_page(self, url):
async with self.semaphore:
try:
async with self.session.get(url) as response:
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
title = soup.find('title')
return {
'url': url,
'title': title.text if title else 'No title',
'status': response.status
}
except Exception as e:
return {'url': url, 'error': str(e)}
async def scrape_multiple(self, urls):
tasks = [self.scrape_page(url) for url in urls]
return await asyncio.gather(*tasks)
# 使用示例
async def main():
urls = [
'https://example.com',
'https://httpbin.org',
'https://github.com',
# 添加更多URL
]
start_time = time.time()
async with AsyncWebScraper(max_concurrent=5) as scraper:
results = await scraper.scrape_multiple(urls)
end_time = time.time()
print(f"爬取完成,耗时: {end_time - start_time:.2f}秒")
for result in results:
print(result)
2. 实时数据处理
import asyncio
import json
from datetime import datetime
class AsyncDataProcessor:
def __init__(self):
self.queue = asyncio.Queue()
self.processed_count = 0
async def producer(self, data_source):
"""数据生产者"""
for item in data_source:
await self.queue.put(item)
await asyncio.sleep(0.1) # 模拟数据到达间隔
# 发送结束信号
await self.queue.put(None)
async def consumer(self, consumer_id):
"""数据消费者"""
while True:
item = await self.queue.get()
if item is None:
# 结束信号,重新放回队列供其他消费者使用
await self.queue.put(None)
break
# 处理数据
processed_item = await self.process_item(item, consumer_id)
self.processed_count += 1
print(f"消费者 {consumer_id} 处理: {processed_item}")
self.queue.task_done()
async def process_item(self, item, consumer_id):
"""处理单个数据项"""
# 模拟处理时间
await asyncio.sleep(0.5)
return {
'original': item,
'processed_by': consumer_id,
'processed_at': datetime.now().isoformat(),
'processed_data': item * 2 if isinstance(item, (int, float)) else f"processed_{item}"
}
async def run(self, data_source, num_consumers=3):
"""运行数据处理管道"""
# 启动生产者
producer_task = asyncio.create_task(
self.producer(data_source)
)
# 启动多个消费者
consumer_tasks = [
asyncio.create_task(self.consumer(f"consumer_{i}"))
for i in range(num_consumers)
]
# 等待所有任务完成
await asyncio.gather(producer_task, *consumer_tasks)
print(f"处理完成,总共处理了 {self.processed_count} 个项目")
# 使用示例
async def main():
processor = AsyncDataProcessor()
# 模拟数据源
data_source = list(range(1, 21)) # 1到20的数字
await processor.run(data_source, num_consumers=3)
asyncio.run(main())
总结
Python异步编程通过asyncio库提供了强大的并发处理能力,特别适合I/O密集型任务。掌握异步编程的关键点包括:
核心要点
- 理解协程概念:使用
async def定义协程函数,用await等待异步操作 - 合理使用并发:通过
asyncio.gather()、asyncio.wait()等实现并发执行 - 资源管理:使用连接池、信号量等控制资源使用
- 错误处理:实现重试机制和优雅的错误处理
最佳实践
- 在I/O密集型任务中使用异步编程
- 合理控制并发数量,避免资源耗尽
- 使用连接池提高性能
- 实现适当的错误处理和重试机制
- 注意避免在异步代码中使用阻塞操作
适用场景
- Web API调用和爬虫
- 数据库操作
- 文件I/O操作
- 实时数据处理
- 网络服务开发
通过掌握这些概念和技巧,你可以编写出高效、可维护的异步Python应用程序。
168

被折叠的 条评论
为什么被折叠?



