need

好久以前改的代码,都快一年了。今天跑竟然出问题了。

原来是opensns中每个模拟转发或者模拟发贴页面的index首页,也就是说idrepost.html,idpost.html等页面中的一个引导出错了。

        <!--筛选部分结束-->
        <div id="top_list" >
            <volist name="top_list" id="top">
                {:W('WeiboDetail1/detail1',array('weibo_id'=>$top,'can_hide'=>0))}
            </volist>
        </div>
        



def get_connections_key(item): native_price = item.get('native_price', item['costPrice']) # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['transferAirport'] + item['arr'] + item['depDate'] + item[ 'depDate2'] + item['flightNo'] + item['flightNo2'] + item[ 'cabin'] + \ item['costPrice'] + native_price + item['seatNum'] # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def get_key(item): native_price = item.get('native_price', item['costPrice']) # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['arr'] + item['depDate'] + item['flightNo'] + item[ 'cabin'] + item['costPrice'] + native_price + item['seatNum'] # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def save(old_data, new_data, database_address, database_name): try: # 1. 预处理:创建MD5映射字典提高查找效率 old_data_md5_map = {get_key(item): item for item in old_data} new_data_md5_map = {get_key(item): item for item in new_data} # 2. 分类处理数据 need_invalidate_ids = [] # 需要标记为无效的ID need_update_time_md5 = [] # 需要更新的数据的MD5 to_insert_data = [] # 需要插入的新数据 unique_dict = {} # for item in old_data: # key = (item['policyType'], item['depDate'], item['flightNo'], item['cabin']) # 组合成元组 # if key in unique_dict: # # 如果 key 已经存在,说明当前 item 是重复的,将其添加到 duplicated_items # need_invalidate_ids.append(item['_id']) # else: # # 如果 key 不存在,将当前 item 添加到 unique_dict # unique_dict[key] = item # 2.1 找出需要失效的旧数据(存在于old但不存在于new) for md5, item in old_data_md5_map.items(): if md5 not in new_data_md5_map: need_invalidate_ids.append(item['_id']) # 2.2 找出需要更新的数据和真正的新数据 for md5, new_item in new_data_md5_map.items(): if md5 in old_data_md5_map: # 记录MD5,稍后从old_data中获取_id need_update_time_md5.append(md5) else: to_insert_data.append(new_item) # 获取需要更新时间的数据的ID need_update_time_ids = [ old_data_md5_map[md5]['_id'] for md5 in need_update_time_md5 if md5 in old_data_md5_map ] # 3. 执行数据库操作 success_flag = True # 3.1 插入新数据 if to_insert_data: try: result = database_address.insert_many( collect_name=database_name, documents=to_insert_data ) logger.info(f"插入{len(result.inserted_ids)}条新数据") except Exception as e: logger.error(f"【{database_name}】插入数据失败: {str(e)}") success_flag = False # 3.2 更新现有数据的时间戳 # if need_update_time_ids and success_flag: # try: # filter_criteria = {'_id': {'$in': need_update_time_ids}} # update_result = database_address.update_many( # collect_name=database_name, # filter=filter_criteria, # update={"$set": {"updateTime": datetime.now()}} # ) # logger.info(f"更新{update_result.matched_count}条数据时间戳") # except Exception as e: # logger.error(f"更新时间戳失败: {str(e)}") # success_flag = False # 3.3 标记失效数据 if need_invalidate_ids and success_flag: try: filter_criteria = {'_id': {'$in': need_invalidate_ids}} update_result = database_address.update_many( collect_name=database_name, filter=filter_criteria, update={ "$set": { "status": 1, "updateTime": datetime.now() } } ) logger.info(f"标记{update_result.matched_count}条数据为无效") except Exception as e: logger.error(f"【{database_name}】标记无效数据失败: {str(e)}") success_flag = False return success_flag except Exception as e: logger.error(f"【{database_name}】增量保存异常: {str(e)}", exc_info=True) return False def saveConnections(old_data, new_data, database_address, database_name): try: # 1. 预处理:创建MD5映射字典提高查找效率 old_data_md5_map = {get_connections_key(item): item for item in old_data} new_data_md5_map = {get_connections_key(item): item for item in new_data} # 2. 分类处理数据 need_invalidate_ids = [] # 需要标记为无效的ID need_update_time_md5 = [] # 需要更新的数据的MD5 to_insert_data = [] # 需要插入的新数据 # 2.1 找出需要失效的旧数据(存在于old但不存在于new) for md5, item in old_data_md5_map.items(): if md5 not in new_data_md5_map: need_invalidate_ids.append(item['_id']) # 2.2 找出需要更新的数据和真正的新数据 for md5, new_item in new_data_md5_map.items(): if md5 in old_data_md5_map: # 记录MD5,稍后从old_data中获取_id need_update_time_md5.append(md5) else: to_insert_data.append(new_item) # 获取需要更新时间的数据的ID need_update_time_ids = [ old_data_md5_map[md5]['_id'] for md5 in need_update_time_md5 if md5 in old_data_md5_map ] # 3. 执行数据库操作 success_flag = True # 3.1 插入新数据 if to_insert_data: try: result = database_address.insert_many( collect_name=database_name, documents=to_insert_data ) logger.info(f"插入{len(result.inserted_ids)}条新数据") except Exception as e: logger.error(f"【{database_name}】插入数据失败: {str(e)}") success_flag = False # 3.2 更新现有数据的时间戳 if need_update_time_ids and success_flag: try: filter_criteria = {'_id': {'$in': need_update_time_ids}} update_result = database_address.update_many( collect_name=database_name, filter=filter_criteria, update={"$set": {"update_time": datetime.now()}} ) logger.info(f"更新{update_result.matched_count}条数据时间戳") except Exception as e: logger.error(f"更新时间戳失败: {str(e)}") success_flag = False # 3.3 标记失效数据 if need_invalidate_ids and success_flag: try: filter_criteria = {'_id': {'$in': need_invalidate_ids}} update_result = database_address.update_many( collect_name=database_name, filter=filter_criteria, update={ "$set": { "status": 1, "update_time": datetime.now() } } ) logger.info(f"标记{update_result.matched_count}条数据为无效") except Exception as e: logger.error(f"【{database_name}】标记无效数据失败: {str(e)}") success_flag = False return success_flag except Exception as e: logger.error(f"【{database_name}】增量保存异常: {str(e)}", exc_info=True) return False def get_key_intel(item): # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['arr'] + item['depDate'] + item['flightNo'] + item[ 'cabin'] + item['price'] + item['totalTax'] + item['seatNum'] # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def saveIntel(old_data, new_data, database_address, database_name): try: # 1. 预处理:创建MD5映射字典提高查找效率 old_data_md5_map = {get_key_intel(item): item for item in old_data} new_data_md5_map = {get_key_intel(item): item for item in new_data} # 2. 分类处理数据 need_invalidate_ids = [] # 需要标记为无效的ID need_update_time_md5 = [] # 需要更新的数据的MD5 to_insert_data = [] # 需要插入的新数据 unique_dict = {} for item in old_data: # key = (item['policyType'], item['dep'], item['arr'], item['depDate'], item['flightNo']) # 组合成元组 key = get_key_intel(item) # 组合成元组 if key in unique_dict: # 如果 key 已经存在,说明当前 item 是重复的,将其添加到 duplicated_items need_invalidate_ids.append(item['_id']) else: # 如果 key 不存在,将当前 item 添加到 unique_dict unique_dict[key] = item # 2.1 找出需要失效的旧数据(存在于old但不存在于new) for md5, item in old_data_md5_map.items(): if md5 not in new_data_md5_map: need_invalidate_ids.append(item['_id']) # 2.2 找出需要更新的数据和真正的新数据 for md5, new_item in new_data_md5_map.items(): if md5 in old_data_md5_map: # 记录MD5,稍后从old_data中获取_id need_update_time_md5.append(md5) else: to_insert_data.append(new_item) # 获取需要更新时间的数据的ID need_update_time_ids = [ old_data_md5_map[md5]['_id'] for md5 in need_update_time_md5 if md5 in old_data_md5_map ] # 3. 执行数据库操作 success_flag = True # 3.1 插入新数据 if to_insert_data: try: result = database_address.insert_many( collect_name=database_name, documents=to_insert_data ) logger.info(f"插入{len(result.inserted_ids)}条新数据,{to_insert_data}") except Exception as e: logger.error(f"【{database_name}】插入数据失败: {str(e)}") success_flag = False # 3.2 更新现有数据的时间戳 # if need_update_time_ids and success_flag: # try: # filter_criteria = {'_id': {'$in': need_update_time_ids}} # update_result = database_address.update_many( # collect_name=database_name, # filter=filter_criteria, # update={"$set": {"updateTime": datetime.now()}} # ) # logger.info(f"更新{update_result.matched_count}条数据时间戳") # except Exception as e: # logger.error(f"更新时间戳失败: {str(e)}") # success_flag = False # 3.3 标记失效数据 if need_invalidate_ids: try: filter_criteria = {'_id': {'$in': need_invalidate_ids}} update_result = database_address.update_many( collect_name=database_name, filter=filter_criteria, update={ "$set": { "status": 1, "updateTime": datetime.now() } } ) logger.info(f"标记{update_result.matched_count}条数据为无效") except Exception as e: logger.error(f"【{database_name}】标记无效数据失败: {str(e)}") success_flag = False return success_flag except Exception as e: logger.error(f"【{database_name}】增量保存异常: {str(e)}", exc_info=True) return False def get_key_intel_new(item): policy_segment_str = ''.join(str(segment) for segment in item['policySegmentList']) # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['arr'] + item['depDate'] + item['flightNo'] + item[ 'cabin'] + item['price'] + item['totalTax'] + item['seatNum'] + policy_segment_str # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def get_key_new(item): # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['arr'] + item['depDate'] + item['flightNo'] + item[ 'cabin'] # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def get_price_key(item): combined_string = item['price'] + item['totalTax'] + item['totalPrice'] return hashlib.md5(combined_string.encode()).hexdigest() def filter_upd_data(ins, item): if get_key_new(ins) == get_key_new(item): # 出发到达日期航班号舱位一致,视为一条数据 if get_price_key(ins) == get_price_key(item): # 价格也一样,视为需要更新的数据 return item def filter_invalid_data(ins, item): if get_key_new(ins) == get_key_new(item): # 出发到达日期航班号舱位一致,视为一条数据 if get_price_key(ins) != get_price_key(item): # 价格不一样,视为需要无效的数据 return item def update_mongo_data_new(mgClient, need_ins_list, HBGJ_PRICE_DATA, dep, arr, dep_date, task_tag): mongo_update_lock.acquire() try: temp_res_data = mgClient.find(collect_name=HBGJ_PRICE_DATA, filter={'policyType': task_tag, 'dep_tag': dep, 'arr_tag': arr, 'depDate': dep_date, 'status': 0}) temp_res_data = list(temp_res_data) # 1. 预处理:创建MD5映射字典提高查找效率 old_data_md5_map = {get_key(item): item for item in temp_res_data} new_data_md5_map = {get_key(item): item for item in need_ins_list} # 2. 分类处理数据 need_invalidate_ids = [] # 需要标记为无效的ID need_update_time_md5 = [] # 需要更新的数据的MD5 to_insert_data = [] # 需要插入的新数据 unique_dict = {} # for item in old_data: # key = (item['policyType'], item['depDate'], item['flightNo'], item['cabin']) # 组合成元组 # if key in unique_dict: # # 如果 key 已经存在,说明当前 item 是重复的,将其添加到 duplicated_items # need_invalidate_ids.append(item['_id']) # else: # # 如果 key 不存在,将当前 item 添加到 unique_dict # unique_dict[key] = item # 2.1 找出需要失效的旧数据(存在于old但不存在于new) for md5, item in old_data_md5_map.items(): if md5 not in new_data_md5_map: need_invalidate_ids.append(item['_id']) # 2.2 找出需要更新的数据和真正的新数据 for md5, new_item in new_data_md5_map.items(): if md5 in old_data_md5_map: # 记录MD5,稍后从old_data中获取_id need_update_time_md5.append(md5) else: to_insert_data.append(new_item) # 获取需要更新时间的数据的ID need_update_time_ids = [ old_data_md5_map[md5]['_id'] for md5 in need_update_time_md5 if md5 in old_data_md5_map ] # 3. 执行数据库操作 success_flag = True # 3.1 插入新数据 if to_insert_data: try: result = mgClient.insert_many( collect_name=HBGJ_PRICE_DATA, documents=to_insert_data ) logger.info(f"插入{len(result.inserted_ids)}条新数据,{to_insert_data}") except Exception as e: logger.error(f"【{HBGJ_PRICE_DATA}】插入数据失败: {str(e)}") success_flag = False # 3.2 更新现有数据的时间戳 # if need_update_time_ids and success_flag: # try: # filter_criteria = {'_id': {'$in': need_update_time_ids}} # update_result = database_address.update_many( # collect_name=database_name, # filter=filter_criteria, # update={"$set": {"updateTime": datetime.now()}} # ) # logger.info(f"更新{update_result.matched_count}条数据时间戳") # except Exception as e: # logger.error(f"更新时间戳失败: {str(e)}") # success_flag = False # 3.3 标记失效数据 if need_invalidate_ids and success_flag: try: filter_criteria = {'_id': {'$in': need_invalidate_ids}} update_result = mgClient.update_many( collect_name=HBGJ_PRICE_DATA, filter=filter_criteria, update={ "$set": { "status": 1, "updateTime": datetime.now() } } ) logger.info(f"标记{update_result.matched_count}条数据为无效") except Exception as e: logger.error(f"【{HBGJ_PRICE_DATA}】标记无效数据失败: {str(e)}") success_flag = False return success_flag except Exception as e: logger.info(f"更新MongoDB数据时发生异常: {str(e)}") raise # 可以选择重新抛出异常或者处理异常 finally: # 确保锁被释放 mongo_update_lock.release() def update_mongo_data(mgClient, need_ins_list, temp_res_data, HBGJ_PRICE_DATA): """更新MongoDB数据""" # 需要更新成无效的数据 need_upd_data = [] for item in temp_res_data: item_md5 = get_key(item) temp_data = [ins for ins in need_ins_list if get_key(ins) == item_md5] if not temp_data: need_upd_data.append(item) else: need_ins_list = [ins for ins in need_ins_list if get_key(ins) != item_md5] # 插入新有效数据 is_data_flag = 0 if need_ins_list: is_data_flag = 1 mgClient.insert_many(collect_name=HBGJ_PRICE_DATA, documents=need_ins_list) logger.info(f"【插入有效数据】: 长度{len(need_ins_list)}") # 更新成无效数据 upd_id_list = [data['_id'] for data in need_upd_data] if upd_id_list: filter_criteria = {'_id': {'$in': upd_id_list}} res = mgClient.update_many( collect_name=HBGJ_PRICE_DATA, filter=filter_criteria, update={"$set": {"status": 1, "update_time": datetime.now()}} ) logger.info(f"【更新无效数据】: 长度{res.matched_count}") return is_data_flag def saveIntelNew(old_data, new_data, database_address, database_name): try: # 1. 预处理:创建MD5映射字典提高查找效率 old_data_md5_map = {get_key_intel_new(item): item for item in old_data} new_data_md5_map = {get_key_intel_new(item): item for item in new_data} # 2. 分类处理数据 need_invalidate_ids = [] # 需要标记为无效的ID need_update_time_md5 = [] # 需要更新的数据的MD5 to_insert_data = [] # 需要插入的新数据 unique_dict = {} for item in old_data: # key = (item['policyType'], item['dep'], item['arr'], item['depDate'], item['flightNo']) # 组合成元组 key = get_key_intel(item) # 组合成元组 if key in unique_dict: # 如果 key 已经存在,说明当前 item 是重复的,将其添加到 duplicated_items need_invalidate_ids.append(item['_id']) else: # 如果 key 不存在,将当前 item 添加到 unique_dict unique_dict[key] = item # 2.1 找出需要失效的旧数据(存在于old但不存在于new) for md5, item in old_data_md5_map.items(): if md5 not in new_data_md5_map: need_invalidate_ids.append(item['_id']) # 2.2 找出需要更新的数据和真正的新数据 for md5, new_item in new_data_md5_map.items(): if md5 in old_data_md5_map: # 记录MD5,稍后从old_data中获取_id need_update_time_md5.append(md5) else: to_insert_data.append(new_item) # 获取需要更新时间的数据的ID need_update_time_ids = [ old_data_md5_map[md5]['_id'] for md5 in need_update_time_md5 if md5 in old_data_md5_map ] # 3. 执行数据库操作 success_flag = True # 3.1 插入新数据 if to_insert_data: try: result = database_address.insert_many( collect_name=database_name, documents=to_insert_data ) logger.info(f"插入{len(result.inserted_ids)}条新数据") except Exception as e: logger.error(f"【{database_name}】插入数据失败: {str(e)}") success_flag = False # 3.2 更新现有数据的时间戳 # if need_update_time_ids and success_flag: # try: # filter_criteria = {'_id': {'$in': need_update_time_ids}} # update_result = database_address.update_many( # collect_name=database_name, # filter=filter_criteria, # update={"$set": {"updateTime": datetime.now()}} # ) # logger.info(f"更新{update_result.matched_count}条数据时间戳") # except Exception as e: # logger.error(f"更新时间戳失败: {str(e)}") # success_flag = False # 3.3 标记失效数据 if need_invalidate_ids and success_flag: try: filter_criteria = {'_id': {'$in': need_invalidate_ids}} update_result = database_address.update_many( collect_name=database_name, filter=filter_criteria, update={ "$set": { "status": 1, "updateTime": datetime.now() } } ) logger.info(f"标记{update_result.matched_count}条数据为无效") except Exception as e: logger.error(f"【{database_name}】标记无效数据失败: {str(e)}") success_flag = False return success_flag except Exception as e: logger.error(f"【{database_name}】增量保存异常: {str(e)}", exc_info=True) return False def get_key_statistics(item): # 拼接字段值 combined_string = item['policyType'] + item['dep'] + item['arr'] + item['depDate'] + item['flightNo'] + item[ 'cabin'] + \ item['costPrice'] + item['seatNum'] # 计算 MD5 return hashlib.md5(combined_string.encode()).hexdigest() def saveStatistics(old_data, new_data, database_address, database_name): try: # 需要更新成无效的数据 need_upd_data = [] # 需要刷新更新时间的数据 need_upd_time_data = [] is_data_flag = 0 for item in old_data: item_md5 = get_key(item) temp_data = [ins for ins in new_data if get_key(ins) == item_md5] if len(temp_data) == 0: # 将库里有的,新数据没有的放到更新无效列表 need_upd_data.append(item) elif len(temp_data) >= 1: # 将库里有的,不需要变动的数据从插入列表删除 new_data = [ins for ins in new_data if get_key(ins) != item_md5] need_upd_time_data = [ins for ins in new_data if get_key(ins) == item_md5] # 插入新有效数据 if len(new_data) > 0: database_address.insert_many(collect_name=database_name, documents=new_data) logger.info(f"【插入数据】: 长度{len(new_data)}") # 刷新更新时间 if len(need_upd_time_data) > 0: upd_id_list = [data['_id'] for data in need_upd_time_data] time_limit = datetime.utcnow() + timedelta(hours=3) filter_criteria = { '_id': {'$in': upd_id_list}, 'update_time': {'$lt': time_limit} } res = database_address.update_many(collect_name=database_name, filter=filter_criteria, update={ "$set": {"update_time": datetime.now()}, "$inc": {"run_count": 1} }) logger.info(f"【更新插入时间数据】: 匹配{res.matched_count}条,更新{res.modified_count}条") except Exception as e: logger.error(f"【保存异常】: {e}") return None def saveStatistics1(old_data, new_data, database_address, database_name): try: # 需要更新成无效的数据 need_upd_data = [] # 需要刷新更新时间的数据 need_upd_time_data = [] is_data_flag = 0 for item in old_data: item_md5 = get_key(item) temp_data = [ins for ins in new_data if get_key(ins) == item_md5] if len(temp_data) == 0: # 将库里有的,新数据没有的放到更新无效列表 need_upd_data.append(item) elif len(temp_data) >= 1: # 将库里有的,不需要变动的数据从插入列表删除 new_data = [ins for ins in new_data if get_key(ins) != item_md5] need_upd_time_data = [ins for ins in new_data if get_key(ins) == item_md5] # 插入新有效数据 if len(new_data) > 0: database_address.insert_many(collect_name=database_name, documents=new_data) logger.info(f"【插入数据】: 长度{len(new_data)}") # 刷新更新时间 if len(need_upd_time_data) > 0: upd_id_list = [data['_id'] for data in need_upd_time_data] time_limit = datetime.utcnow() + timedelta(hours=24) filter_criteria = { '_id': {'$in': upd_id_list}, 'update_time': {'$lt': time_limit} } res = database_address.update_many(collect_name=database_name, filter=filter_criteria, update={ "$set": {"update_time": datetime.now()}, "$inc": {"run_count": 1} }) logger.info(f"【更新插入时间数据】: 匹配{res.matched_count}条,更新{res.modified_count}条") except Exception as e: logger.error(f"【保存异常】: {e}") return None 每行代码添加中文注释
最新发布
09-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值