Message Queue Evaluation

本文档提供了Second Life中消息队列系统的详细评估记录,包括其设计考量、性能指标及应用场景等关键信息。

http://wiki.secondlife.com/wiki/Message_Queue_Evaluation_Notes

#!/usr/bin/python3 # -*- coding:utf-8 -*- """ @author: @file: async_yuyi_comment.py @time: 2025/8/8 11:36 @desc: """ import os import json import logging import asyncio import aiohttp import datetime from configparser import ConfigParser from sqlalchemy import create_engine, text from math import ceil config = ConfigParser() current_path = os.path.dirname(os.path.abspath(__file__)) project_path = os.path.dirname(current_path) config.read(os.path.join(project_path, 'config.ini')) db_link1 = dict(config.items("link_info1")) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') engine = create_engine(f'mysql+pymysql://{db_link1["user"]}:{db_link1["password"]}@{db_link1["host"]}:{db_link1["port"]}/{db_link1["database"]}?charset=utf8mb4') class AsyncPager: def __init__(self, start_date, end_date, max_concurrency=5, max_retries=3): self.queue = asyncio.Queue() self.service_host = 'openapi.yuyidata.com' self.APP_KEY = "111" self.semaphore = asyncio.Semaphore(max_concurrency) self.start_date = start_date self.end_date = end_date self.max_retries = max_retries self.session = None self.limit = 100 self.headers = {"Content-Type": "application/json;charset=UTF-8"} self.conn = None async def fetch(self, offset, time_type=1): async with self.semaphore: for retry in range(self.max_retries): url = rf'https://{self.service_host}/openapi/v4/comment/check/result' data = { 'appKey': self.APP_KEY, 'startDate': self.start_date.strftime('%Y-%m-%d') + ' 00:00:00', 'endDate': self.end_date.strftime('%Y-%m-%d') + ' 23:59:59', 'timeType': time_type, # 默认评论时间, 传0按评论时间查 ,1代表按修改时间查询 'offset': offset * self.limit, 'limit': self.limit, } try: async with self.session.post(url, json=data, timeout=60) as response: if response.status == 200: return await response.json() await asyncio.sleep(2 ** retry) # 指数退避 except (aiohttp.ClientError, asyncio.TimeoutError) as e: logging.error(f"{url}: {str(e)}") await asyncio.sleep(1) return None def insert_database(self, comment_need_data, label_need_data): sql = 'replace into rpa_yuyi_voc_comment_text (open_id,comment_type,main_comment_id,time,source,shop_name,reviewer_name,source_sku_spec_brand,is_competing_product,source_sku_spec_class_name,source_sku_spec_spec,source_sku_spec_color,source_sku_name,source_product_url,source_product_id,source_sku_id,order_id,sub_order_id,is_effective_evaluation,comment_abstract,comment_length,reply_content,comment_accessory_type,pic_list,video_list,rating) values (:open_id,:comment_type,:main_comment_id,:time,:source,:shop_name,:reviewer_name,:source_sku_spec_brand,:is_competing_product,:source_sku_spec_class_name,:source_sku_spec_spec,:source_sku_spec_color,:source_sku_name,:source_product_url,:source_product_id,:source_sku_id,:order_id,:sub_order_id,:is_effective_evaluation,:comment_abstract,:comment_length,:reply_content,:comment_accessory_type,:pic_list,:video_list,:rating)' label_sql = 'insert into rpa_yuyi_voc_comment_label_text (open_id,dimension,id,name,name_en,path,path_en) values (:open_id,:dimension,:id,:name,:name_en,:path,:path_en)' del_label_sql = 'delete from rpa_yuyi_voc_comment_label_text where open_id = :open_id' self.conn.execute(text(sql), comment_need_data) for o in comment_need_data: self.conn.execute(text(del_label_sql), {'open_id': o["open_id"]}) self.conn.execute(text(label_sql), label_need_data) self.conn.commit() async def parse(self, page_num, result_data): comment_need_data = [] label_need_data = [] for data in result_data.get("data", []): source_sku_spec_brand = None source_sku_spec_spec = None source_sku_spec_color = None source_sku_spec_class_name = None is_competing_product = '否' is_effective_evaluation = None if data.get('sourceSkuSpec') is not None: source_sku_spec = json.loads(data.get('sourceSkuSpec')) source_sku_spec_brand = source_sku_spec.get('品牌') source_sku_spec_spec = source_sku_spec.get('型号') source_sku_spec_class_name = source_sku_spec.get('商品品类') source_sku_spec_color = source_sku_spec.get('颜色') if source_sku_spec_brand in ['usmile']: is_competing_product = '是' for label1 in data.get('labels', []): for label2 in label1.get('labels', []): if label1.get('dimension') == '是否有效': is_effective_evaluation = label2.get('name') label_need_data.append({ 'open_id': data.get('openId'), 'dimension': label1.get('dimension'), 'id': label2.get('id'), 'name': label2.get('name'), 'name_en': label2.get('nameEn'), 'path': label2.get('path'), 'path_en': label2.get('pathEn'), }) comment_need_data.append({ 'open_id': data.get('openId'), 'comment_type': data.get('commentType'), 'main_comment_id': data.get('mainCommentId'), 'time': data.get('time'), 'source': data.get('source'), 'shop_name': data.get('shopName'), 'reviewer_name': data.get('reviewerName'), 'source_sku_spec_brand': source_sku_spec_brand, 'is_competing_product': is_competing_product, 'source_sku_spec_class_name': source_sku_spec_class_name, 'source_sku_spec_spec': source_sku_spec_spec, 'source_sku_spec_color': source_sku_spec_color, 'source_sku_name': data.get('sourceSkuName'), 'source_product_url': data.get('sourceProductUrl'), 'source_product_id': data.get('sourceProductId'), 'source_sku_id': data.get('sourceSkuId'), 'order_id': data.get('orderId'), 'sub_order_id': data.get('subOrderId'), 'is_effective_evaluation': is_effective_evaluation, 'comment_abstract': data.get('commentAbstract'), 'comment_length': data.get('commentLength'), 'reply_content': data.get('replyContent'), 'comment_accessory_type': data.get('commentAccessoryType'), 'pic_list': ','.join(data.get('picList', [])), 'video_list': ','.join(data.get('videoList', [])), 'rating': data.get('rating'), }) if page_num == 0: total_pages = ceil(result_data.get('total', 0) / self.limit) print('总页码:', total_pages) for p in range(1, total_pages + 1): await self.queue.put(p) return comment_need_data, label_need_data async def worker(self, _): """工作协程:从队列取任务并处理""" while True: page_num = await self.queue.get() if page_num is None: # 退出信号 self.queue.task_done() break print(f'当前进程{_},当前页码:{page_num}') result_data = await self.fetch(page_num) comment_need_data, label_need_data = await self.parse(page_num, result_data) self.insert_database(comment_need_data, label_need_data) print(f'当前进程{_},当前页码:{page_num}--完成') self.queue.task_done() async def run(self): with engine.connect() as conn: self.conn = conn async with aiohttp.ClientSession(headers=self.headers) as session: self.session = session await self.queue.put(0) # 初始任务 workers = [asyncio.create_task(self.worker(i)) for i in range(10)] # 等待所有任务完成 await self.queue.join() # 发送退出信号 for _ in range(len(workers)): await self.queue.put(None) # 等待工作协程退出 await asyncio.gather(*workers) print('所有任务完成') if __name__ == "__main__": s_date = datetime.datetime(2025, 8, 7) e_date = datetime.datetime(2025, 8, 7) crawler = AsyncPager(s_date, e_date) asyncio.run(crawler.run()) 这段代码为什么不会结束
08-11
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值