async with semaphore

本文介绍了如何在Python中使用asyncio和confluent_kafka库限制Kafka消费者处理消息的并发量,通过Semaphore实现任务的有序执行,确保了在处理消息时的线程池控制。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import asyncio
import confluent_kafka

假设我们想要限制并发任务数量为5

semaphore = asyncio.Semaphore(5)

async def process_message_in_thread(msg):
async with semaphore:
# 在这里处理消息,这将是并发执行的,但受到信号量的限制
print(f"Processing message: {msg.value().decode(‘utf-8’)}")
await asyncio.sleep(1) # 假设处理需要一些时间

async def consume_messages():
# Kafka配置和消费者创建…

try:  
    while True:  
        msg = c.poll(1.0)  
        if msg is None:  
            continue  
        if msg.error():  
            # 处理错误...  
            continue  
          
        # 使用asyncio.to_thread异步提交消息处理到线程池,并受信号量控制  
        await asyncio.to_thread(process_message_in_thread, msg)  
          
finally:  
    # 清理...  
    c.close()  

创建并运行消费者任务

asyncio.run(consume_messages())

# -*- coding:utf-8 -*- import os import pandas as pd import asyncio import sys from datetime import datetime import uvloop from volcenginesdkarkruntime import AsyncArk # 读取CSV check_wfy = pd.read_csv('/data3/users/duant/2025-3-19/have.csv') output_path = '/data3/users/zhuominli/语言模型探索/推理任务/wfy' # 读取提示文件 with open('/data3/users/zhuominli/语言模型探索/推理任务/prompt.txt', 'r', encoding='utf-8') as f: prompt = f.read() async def worker(worker_id, task_num, conditions, results): client = AsyncArk() print(f"Worker {worker_id} is starting.") for i in range(task_num): if i >= len(conditions): break print(f"Worker {worker_id} task {i} is running.") try: completion = await client.batch_chat.completions.create( model="ep-bi-20250319172923-vzttd", messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": str(conditions[i])}, ], temperature = 0.6 ) results[i] = completion.choices[0].message.content except Exception as e: print(f"Worker {worker_id} task {i} failed with error: {e}") else: print(f"Worker {worker_id} task {i} is completed.") # Display progress print(f"Worker {worker_id} progress: {i+1}/{task_num} tasks completed.") print(f"Worker {worker_id} is completed.") async def main(): start = datetime.now() # 核心参数配置 CONCURRENT_LIMIT = 1000 # 并发请求数 BATCH_SIZE = 5000 # 每批处理量 MAX_RETRIES = 3 # 最大重试次数 TIMEOUT = 30 # 单请求超时(秒) # 初始化共享客户端 client = AsyncArk() # 分批次处理 conditions = check_wfy['EXAM_FINDING'].tolist() total_batches = (len(conditions) + BATCH_SIZE - 1) // BATCH_SIZE for batch_idx in range(0, len(conditions), BATCH_SIZE): batch_cond = conditions[batch_idx:batch_idx+BATCH_SIZE] results = [None] * len(batch_cond) semaphore = asyncio.Semaphore(CONCURRENT_LIMIT) async def process_item(i): async with semaphore: for retry in range(MAX_RETRIES): try: completion = await client.batch_chat.completions.create( model="ep-bi-20250319172923-vzttd", messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": str(batch_cond[i])}, ], temperature = 0.6 # Removed the request_timeout parameter that was causing the error ) results[i] = completion.choices[0].message.content return except Exception as e: if retry == MAX_RETRIES-1: results[i] = f"Failed after {MAX_RETRIES} retries: {str(e)}" await asyncio.sleep(2 ** retry) tasks = [process_item(i) for i in range(len(batch_cond))] await asyncio.gather(*tasks) # 实时保存批次结果 output_file = os.path.join(output_path, 'results.csv') pd.DataFrame({'condition': batch_cond, 'ds_v3': results})\ .to_csv(output_file, mode='a', header=not os.path.exists(output_file)) # Display batch progress print(f"Batch {batch_idx // BATCH_SIZE + 1}/{total_batches} completed.") print(f"Total time: {datetime.now() - start}") if __name__ == "__main__": if sys.version_info >= (3, 11): with asyncio.Runner(loop_factory=uvloop.new_event_loop) as runner: runner.run(main()) else: uvloop.install() asyncio.run(main()) # 20767
04-02
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值