column: id (should be mapped with insert="false" update="false")

[size=x-large][color=orange][b]Spring集成Hibernate:column: id (should be mapped with insert="false" update="false")[/b][/color][/size]

[color=red][b]原因:column重复定义[/b][/color]

状况1:*.hbm文件

<property name="begin" column="begin" type="date"/>
<property name="end" column="begin" type="date"/>


状况2:@Entity实体类

@Id
@GeneratedValue(strategy = GenerationType.AUTO)
@Column(name = "id", nullable = false)
private int id;

@Column(name = "id", nullable = true)
private String uuid;
class SYCNMSpider: def __init__(self, gradio_app, mapping_path): self.__gr = gradio_app # Gradio 应用 self.mapping_flows_data = self.init_mapping_flows(mapping_path) self.__paser_tools = ParserTools(gradio_app) @staticmethod def init_mapping_flows(flows_path): """ 初始化映射表 """ df = pd.read_excel(flows_path, header=[0, 1, 2]) # 填充空值 df.fillna(method='ffill', inplace=True) new_flows = df['新旧版流量对照']['新版'] old_flows = df['新旧版流量对照']['旧版'] mapping_flows = [] for x, y in zip(new_flows.values, old_flows.values): # 映射数据 new_flows = {"level_one_flows": x[0], "level_two_flows": x[1], "level_three_flows": x[2], "leaf_flows": x[3]} old_flows = {"level_one_flows": y[0], "level_two_flows": y[1], "level_three_flows": y[2], "leaf_flows": y[3]} # 添加到映射表 mapping_flows.append({"new_flows": new_flows, "old_flows": old_flows}) return mapping_flows @staticmethod def seach_mapping_flows(mapping_flows, res_return=False, leaf_flows_name="", mapping_new_or_old="new_flows"): """ 搜索映射表 :param flows_level: 流量等级 :param flows_name: 流量名称 :param mapping_new_or_old: 映射新旧版 """ mapping_new_or_old_all = {"new_flows": "old_flows", "old_flows": "new_flows"} if mapping_new_or_old not in mapping_new_or_old_all: raise ValueError("mapping_new_or_old must be in mapping_new_or_old_all 'new_flows' or 'old_flows'") return_flows = mapping_new_or_old if res_return: return_flows = mapping_new_or_old_all[mapping_new_or_old] result = [] for mapping in mapping_flows: if mapping_new_or_old == "new_flows": if mapping["new_flows"]["leaf_flows"] == leaf_flows_name: result.append(mapping[return_flows]) elif mapping_new_or_old == "old_flows": if mapping["old_flows"]["leaf_flows"] == leaf_flows_name: result.append(mapping[return_flows]) # 去重 seen = set() unique_list = [] for d in result: # frozenset将字典条目转换为不可变集合 tuple_d = frozenset(d.items()) if tuple_d not in seen: seen.add(tuple_d) unique_list.append(d) return unique_list def update_file_component(self, choice): if choice == "多文件模式": return self.__gr.File(file_count="multiple", file_types=['.xlsx', '.xls']) elif choice == "文件夹模式": return self.__gr.File(file_count="directory") def select_level(self, x, level="level_one_flows"): result_list = self.seach_mapping_flows(self.mapping_flows_data, res_return=False, leaf_flows_name=x, mapping_new_or_old="new_flows") return result_list[0][level] def start_app(self, input_paths: list): check_prefix = ["商品", "店铺", "直播", "内容"] all_data = [] for file in input_paths: filename = os.path.basename(file) try: x = filename.split("_") y = x[1].split("-") prefix_name = x[0] if prefix_name not in check_prefix: raise self.__gr.Error(f"文件名错误无法判断来源!请添加前缀(例如:商品_)") prefix_date = "-".join(y[1::]) excel_file = pd.ExcelFile(file) sheet_name_list = ['无线流量来源', '经营优势来源渠道'] one_level = '经营优势' sheet_names = excel_file.sheet_names # --------------------------------------------------------------------------------------------------------------- # 读取所有工作表 dfs = {sheet_name: pd.read_excel(excel_file, sheet_name=sheet_name, skiprows=5) for sheet_name in sheet_names} flow_df = dfs[sheet_name_list[0]] # 获取 无线流量来源 # 剔除 经营优势 一级来源 # flow_df = flow_df[flow_df["一级来源"] != one_level] flow_df.insert(flow_df.shape[1], '日期', prefix_date) if len(sheet_names) > 1: # 非全站推 management_df = dfs[sheet_name_list[1]] # 获取经营优势来源渠道 # 添加新列数据 management_df.loc[:, '日期'] = prefix_date management_df.loc[:, '来源'] = prefix_name + "-非全站推" management_df.loc[:, '一级来源'] = one_level management_df.loc[:, '二级来源'] = one_level management_df['三级来源'] = management_df['来源名称'] flow_df.insert(flow_df.shape[1], '来源', prefix_name + "-非全站推") # 删除 来源名称 management_df.drop(columns=['来源名称'], inplace=True) concat_result = pd.concat([flow_df, management_df], axis=0, ignore_index=True) else: # 全站推 flow_df.insert(flow_df.shape[1], '来源', prefix_name + "-全站推") flow_df_new = flow_df[flow_df["二级来源"] != "汇总"] # 需要处理数据 need_parser_df = flow_df[flow_df["二级来源"] == "汇总"] need_parser_df['三级来源'] = need_parser_df['一级来源'] need_parser_df['一级来源'] = need_parser_df['三级来源'].apply(lambda x: self.select_level(x)) need_parser_df['二级来源'] = need_parser_df['三级来源'].apply( lambda x: self.select_level(x, level="level_two_flows")) concat_result = pd.concat([flow_df_new, need_parser_df], axis=0, ignore_index=True) # --------------------------------------------------------------------------------------------------------------- # 将新增加的 'date' 列移动到第一列 cols = ['日期', '来源'] + [col for col in concat_result.columns if col != '日期' and col != '来源'] all_data.append(concat_result[cols]) self.__gr.Info(f"解析成功文件名:{filename}") except self.__gr.exceptions.Error: raise self.__gr.Error(f"文件名错误无法判断来源!请添加前缀(例如:商品_)") except Exception as e: raise self.__gr.Error(f"解析失败,文件名:{filename}") filename = self.__paser_tools.get_save_filename("生意参谋_流量", "sycm_flows") data = pd.concat(all_data, axis=0, ignore_index=True) data.to_excel(filename, index=False, sheet_name="流量来源") return filename class SYCMDownload: def __init__(self, gradio_app, mapping_path): self.__gr = gradio_app # Gradio 应用 self.__paser_tools = ParserTools(gradio_app) self.__downloader = app_sycm_download.SYCMDownloadSpider() self.__date_map = {"日": "day", "月": "month", "周": "week", "近7天": "recent7", "近30天": "recent30"} self.mapping_flows_data = self.init_mapping_flows(mapping_path) def start_app(self, cookies, output_type, flows_dim: list, date_dim, start_date, end_date): all_data = [] all_path = [] logging.info( f" output_type: {output_type} flows_dim: {flows_dim} date_dim: {date_dim} start_date: {start_date} end_date: {end_date}") # 获取参数 dateType = self.__date_map[date_dim] # 获取cookies cookies = self.__paser_tools.parse_cookie(cookies) # 获取时间维度 if date_dim in ["日", "周", "月"]: dateRanges = self.paser_date(dateType, start_date, end_date) else: dateRanges = self.paser_date(dateType) dateType = "day" logging.info(f"dateRanges: {dateRanges}") progress = self.__gr.Progress(track_tqdm=True) for dateRange in progress.tqdm(dateRanges, desc="下载进度", unit="number"): logging.info(f"dateType: {dateType} dateRange: {dateRange}") status, data = self.__downloader.crawl(cookies=cookies, dateType=dateType, dateRange=dateRange) if not status: raise self.__gr.Error("采集失败!") byte_stream = io.BytesIO(data) excel_file = pd.ExcelFile(byte_stream) data = self.paser_flows_dim(excel_file, flows_dim, date_dim, dateRange) all_data.append(data) if output_type == "多个文件": name = "~".join(dateRange.split("|")) filename_m = self.__paser_tools.get_save_filename(f"生意参谋_流量_{name}", "sycm_flows_download") data.to_excel(filename_m, index=False, sheet_name="流量来源") all_path.append(filename_m) if output_type == "多个文件": return all_path filename = self.__paser_tools.get_save_filename("生意参谋_流量_下载", "sycm_flows_download") result = pd.concat(all_data, axis=0, ignore_index=True) result.to_excel(filename, index=False, sheet_name="流量来源") self.__state = False return [filename] def paser_flows_dim(self, excel_file_data, flows_dim, date_dim, dateRange): df = pd.read_excel(excel_file_data, skiprows=5) sheet_name_list = ['店铺渠道', '经营优势来源渠道'] # 全站推 promotion = ["整体", "非全站推广期", "全站推广期"] one_level = '经营优势' sheet_names = excel_file_data.sheet_names # 读取所有工作表 dfs = {sheet_name: pd.read_excel(excel_file_data, sheet_name=sheet_name, skiprows=5) for sheet_name in sheet_names} # 日期 prefix_date = dateRange.split("|")[0] if date_dim == "周" or date_dim == "月": x_1 = dateRange.split("|") prefix_date = "~".join(x_1) flow_df = dfs[sheet_name_list[0]] flow_df.insert(flow_df.shape[1], '日期', prefix_date) if "全站推广期" in list(flow_df.columns): flow_df_new = flow_df[(flow_df["二级来源"] != "汇总") & (flow_df["全站推广期"] == "全站推广期")] flow_df_not = flow_df[flow_df["全站推广期"] != "全站推广期"] # 需要处理数据 need_parser_df = flow_df[(flow_df["二级来源"] == "汇总") & (flow_df["全站推广期"] == "全站推广期")] need_parser_df['三级来源'] = need_parser_df['一级来源'] need_parser_df['一级来源'] = need_parser_df['三级来源'].apply(lambda x: self.select_level(x)) need_parser_df['二级来源'] = need_parser_df['三级来源'].apply( lambda x: self.select_level(x, level="level_two_flows")) result_data = pd.concat([flow_df_new, need_parser_df, flow_df_not], axis=0, ignore_index=True) else: flow_df.loc[:, '全站推广期'] = "整体" result_data = flow_df if len(sheet_names) > 1: # 处理经营优势 management_df = dfs[sheet_name_list[1]] management_df.loc[:, '全站推广期'] = "整体" management_df.loc[:, '日期'] = prefix_date management_df.loc[:, '一级来源'] = one_level management_df.loc[:, '二级来源'] = one_level management_df['三级来源'] = management_df['来源名称'] management_df.drop(columns=['来源名称'], inplace=True) result = pd.concat([result_data, management_df], axis=0, ignore_index=True) else: result = result_data # 重新排序列名 cols = ['日期', '流量载体'] + [col for col in result.columns if col != '日期' and col != '流量载体'] new_flow_dim = [dim + "流量" for dim in flows_dim] filter_cols = result[cols] return filter_cols[filter_cols['流量载体'].isin(new_flow_dim)] def select_level(self, x, level="level_one_flows"): result_list = self.seach_mapping_flows(self.mapping_flows_data, res_return=False, leaf_flows_name=x, mapping_new_or_old="new_flows") return result_list[0][level] @staticmethod def paser_date(time_type, start_date=None, end_date=None): today = datetime.datetime.today() end_day = today - datetime.timedelta(days=1) date_format = "%Y-%m-%d" def parse_date(date_str): return datetime.datetime.strptime(date_str, date_format) if time_type in ["day", "week", "month"]: if not start_date or not end_date: raise ValueError("Error: Start date and end date must be provided for day, week, and month types.") start_dt = parse_date(start_date) end_dt = parse_date(end_date) if end_dt > end_day: raise ValueError("Error: End date cannot be later than the day before the current date.") if time_type == "week": if start_dt.weekday() != 0 or end_dt.weekday() != 6: raise ValueError("Error: For 'week' type, start date must be Monday and end date must be Sunday.") elif time_type == "month": if start_dt.day != 1 or (end_dt + datetime.timedelta(days=1)).day != 1: raise ValueError( "Error: For 'month' type, start date must be the first day of the month and end date must be the last day of the month.") elif time_type in ["recent7", "recent30"]: if start_date or end_date: raise ValueError( "Error: Start date and end date should not be provided for last_7_days and last_30_days types.") start_dt = end_day - datetime.timedelta(days=6 if time_type == "recent7" else 29) end_dt = end_day else: raise ValueError("Error: Invalid type. Allowed values are: day, week, month, recent7, recent30.") result = [] if time_type == "day": curr_date = start_dt while curr_date <= end_dt: date_str = curr_date.strftime(date_format) result.append(f"{date_str}|{date_str}") curr_date += datetime.timedelta(days=1) elif time_type == "week": curr_date = start_dt while curr_date <= end_dt: start_of_week = curr_date - datetime.timedelta(days=curr_date.weekday()) end_of_week = start_of_week + datetime.timedelta(days=6) if end_of_week > end_dt: end_of_week = end_dt result.append(f"{start_of_week.strftime(date_format)}|{end_of_week.strftime(date_format)}") curr_date = end_of_week + datetime.timedelta(days=1) elif time_type == "month": curr_date = start_dt while curr_date <= end_dt: start_of_month = curr_date.replace(day=1) next_month_start = (start_of_month + datetime.timedelta(days=32)).replace(day=1) end_of_month = next_month_start - datetime.timedelta(days=1) if end_of_month > end_dt: end_of_month = end_dt result.append(f"{start_of_month.strftime(date_format)}|{end_of_month.strftime(date_format)}") curr_date = next_month_start elif time_type == "recent7" or time_type == "recent30": curr_date = start_dt while curr_date <= end_dt: date_str = curr_date.strftime(date_format) result.append(f"{date_str}|{date_str}") curr_date += datetime.timedelta(days=1) return result @staticmethod def init_mapping_flows(flows_path): """ 初始化映射表 """ df = pd.read_excel(flows_path, header=[0, 1, 2]) # 填充空值 df.fillna(method='ffill', inplace=True) new_flows = df['新旧版流量对照']['新版'] old_flows = df['新旧版流量对照']['旧版'] mapping_flows = [] for x, y in zip(new_flows.values, old_flows.values): # 映射数据 new_flows = {"level_one_flows": x[0], "level_two_flows": x[1], "level_three_flows": x[2], "leaf_flows": x[3]} old_flows = {"level_one_flows": y[0], "level_two_flows": y[1], "level_three_flows": y[2], "leaf_flows": y[3]} # 添加到映射表 mapping_flows.append({"new_flows": new_flows, "old_flows": old_flows}) return mapping_flows @staticmethod def seach_mapping_flows(mapping_flows, res_return=False, leaf_flows_name="", mapping_new_or_old="new_flows"): """ 搜索映射表 :param flows_level: 流量等级 :param flows_name: 流量名称 :param mapping_new_or_old: 映射新旧版 """ mapping_new_or_old_all = {"new_flows": "old_flows", "old_flows": "new_flows"} if mapping_new_or_old not in mapping_new_or_old_all: raise ValueError("mapping_new_or_old must be in mapping_new_or_old_all 'new_flows' or 'old_flows'") return_flows = mapping_new_or_old if res_return: return_flows = mapping_new_or_old_all[mapping_new_or_old] result = [] for mapping in mapping_flows: if mapping_new_or_old == "new_flows": if mapping["new_flows"]["leaf_flows"] == leaf_flows_name: result.append(mapping[return_flows]) elif mapping_new_or_old == "old_flows": if mapping["old_flows"]["leaf_flows"] == leaf_flows_name: result.append(mapping[return_flows]) # 去重 seen = set() unique_list = [] for d in result: # frozenset将字典条目转换为不可变集合 tuple_d = frozenset(d.items()) if tuple_d not in seen: seen.add(tuple_d) unique_list.append(d) return unique_list @staticmethod def day_yesterday(): # 获取当前日期和时间 now = datetime.datetime.now() # 计算前天的日期 day_yesterday = now - datetime.timedelta(days=1) yesterday = day_yesterday.strftime("%Y-%m-%d") return yesterday def max_day_check(self, input_date_str): input_date = datetime.datetime.strptime(input_date_str, "%Y-%m-%d") # 获取昨天的日期 yesterday = datetime.datetime.now() - datetime.timedelta(days=1) # 比较输入日期和昨天的日期 if input_date > yesterday: return self.__gr.DateTime(include_time=False, min_width=10, type='string', value=self.day_yesterday()) else: return self.__gr.DateTime(include_time=False, min_width=10, type='string', value=input_date_str)
08-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值