奇怪的convert.todatetime

本文记录了一个奇怪的现象:在相同的服务器配置下(两台WEB服务器负载平衡),同样的数据偶尔能成功转换为日期型,偶尔又不能。该问题并非由数据或程序引起,期待高手解答。

今天上午遇到一个奇怪的问题

 

服务器的配置:WEB程序是一样的,WEB服务器是两台,做负载平衡。

同样的数据,一会强制转换为日期型的可能转换成功,一会就转换不成功。

 

如果是数据或程序的问题,那就应该都转换成功或者都转换不成功。

此问题比较奇怪,记录下来,期待高手解决。

import pandas as pd import numpy as np import matplotlib.pyplot as plt from prophet import Prophet from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error import seaborn as sns import time import os # 进度打印函数 def print_progress(step, message, indent=0): indent_str = " " * indent timestamp = time.strftime("%H:%M:%S", time.localtime()) print(f"[{timestamp}] {indent_str}步骤 {step}: {message}") # 设置绘图风格 plt.style.use('seaborn-whitegrid') sns.set_palette("Set2") # 1. 数据预处理函数 def preprocess_data(): print_progress(1, "开始数据预处理") print_progress(1.1, "读取附件1和附件2数据", indent=1) # 读取数据 df_items = pd.read_excel(r"D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C题\附件1.xlsx", engine='openpyxl') df_sales = pd.read_excel(r"D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C题\附件2.xlsx", engine='openpyxl') print_progress(1.2, f"合并数据集: 单品编码={len(df_items)}条, 销售记录={len(df_sales)}条", indent=1) # 合并数据集 df_merged = pd.merge(df_sales, df_items, on='单品编码', how='left') print_progress(1.3, "处理退货数据", indent=1) # 优化退货处理 df_merged['金额'] = df_merged['销量(千克)'] * df_merged['销售单价(元/千克)'] is_return = df_merged['销售类型'] == '退货' df_merged.loc[is_return, ['金额', '销量(千克)']] *= -1 print_progress(1, f"数据预处理完成, 共处理{len(df_merged)}条记录") return df_merged, df_items # 2. 创建透视表函数 def create_pivot_tables(df_merged, df_items): print_progress(2, "开始创建透视表") print_progress(2.1, "计算每日指标", indent=1) # 使用单次分组计算所有指标 daily_metrics = df_merged.groupby(['单品编码', '单品名称', '销售日期']).agg( 平均价格=('销售单价(元/千克)', 'mean'), 总销量=('销量(千克)', 'sum'), 总金额=('金额', 'sum') ).reset_index() print_progress(2.2, "计算退货率", indent=1) # 优化退货率计算 sales_mask = df_merged['销售类型'] == '销售' return_mask = df_merged['销售类型'] == '退货' total_sales = df_merged[sales_mask].groupby('单品编码')['金额'].sum() total_returns = df_merged[return_mask].groupby('单品编码')['金额'].sum().abs() return_rate = (total_returns / total_sales).reset_index(name='退货率') print_progress(2.3, "创建透视表", indent=1) # 创建透视表函数 def create_pivot(metric): pivot_df = daily_metrics.pivot_table( index=['单品编码', '单品名称'], columns='销售日期', values=metric, fill_value=0 ).reset_index() return pd.merge( df_items[['单品编码', '单品名称', '分类名称']].merge(return_rate, on='单品编码', how='left'), pivot_df, on=['单品编码', '单品名称'], how='left' ) print_progress(2, "透视表创建完成") return { 'price': create_pivot('平均价格'), 'vol': create_pivot('总销量'), 'sales': create_pivot('总金额') } # 3. 保存结果函数 def save_results(final_dfs): print_progress(3, "开始保存结果") for name, df in final_dfs.items(): file_path = f'D:\\虚拟c盘\\大学\\竞赛\\数学建模\\2023年C题\\C235问题二output_{name}.xlsx' print_progress(3.1, f"保存 {name} 数据到: {file_path}", indent=1) df.to_excel(file_path, index=False, engine='openpyxl') print_progress(3, "结果保存完成") # 4. 模型评估函数 def evaluate_prophet_performance(y_true, y_pred): """计算Prophet模型预测的评估指标""" return { 'R²': r2_score(y_true, y_pred), 'MAE': mean_absolute_error(y_true, y_pred), 'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)) } # 5. 优化Prophet分析流程并添加可视化 def prophet_analysis_with_visualization(target_category='花叶类'): print_progress(4, "开始Prophet分析") print_progress(4.1, "读取预处理数据", indent=1) # 读取数据 sales_df = pd.read_excel( r"D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C题\结果\品类级销售数据.xlsx" ).rename(columns={'销售日期': 'ds'}) price_df = pd.read_excel( r"D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C题\结果\单品级销售数据.xlsx" ).rename(columns={'销售日期': 'ds'}) # 结果存储 results = [] categories = sales_df.columns[1:] print_progress(4.2, f"开始分析 {len(categories)} 个品类", indent=1) # 分析每个品类 for i, col in enumerate(categories): start_time = time.time() print_progress(4.2, f"分析品类 {i+1}/{len(categories)}: {col}", indent=2) # 准备数据 df = sales_df[['ds', col]].rename(columns={col: 'y'}).query('y > 0') merged_df = pd.merge(df, price_df, on='ds') print_progress(4.3, f"数据准备完成: {len(merged_df)}条记录", indent=3) # 初始化模型 model = Prophet( yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, changepoint_prior_scale=0.05 ) model.add_country_holidays(country_name='CN') # 添加回归量(如果存在) if col in price_df.columns: print_progress(4.4, f"添加回归量: {col}", indent=3) model.add_regressor(col) # 训练模型 print_progress(4.5, "训练模型中...", indent=3) model.fit(merged_df) # 预测 print_progress(4.6, "生成预测", indent=3) future = model.make_future_dataframe(periods=0) future = pd.merge(future, price_df, on='ds') forecast = model.predict(future) # 评估模型 metrics = evaluate_prophet_performance(merged_df['y'], forecast['yhat']) results.append({ '品类名称': col, **metrics }) elapsed = time.time() - start_time print_progress(4.2, f"完成分析 {col}: R²={metrics['R²']:.3f}, 用时 {elapsed:.1f}秒", indent=2) # 如果是目标品类,生成详细可视化 if col == target_category: print_progress(4.7, f"为 {target_category} 生成可视化图表", indent=2) # 1. 整体拟合结果图 plt.figure(figsize=(14, 8)) plt.plot(merged_df['ds'], merged_df['y'], 'b.', alpha=0.5, label='实际值') plt.plot(forecast['ds'], forecast['yhat'], 'r-', linewidth=2, label='预测值') plt.fill_between( forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='r', alpha=0.1 ) plt.title(f'{target_category}销售总量拟合结果', fontsize=16) plt.xlabel('日期', fontsize=12) plt.ylabel('销售量', fontsize=12) plt.legend() plt.grid(True, linestyle='--', alpha=0.7) plt.tight_layout() plot_path = f'{target_category}_销售总量拟合结果.png' plt.savefig(plot_path, dpi=300) plt.close() print_progress(4.7, f"保存拟合结果图: {plot_path}", indent=3) # 2. 各成分分解图 components = ['trend', 'holidays', 'weekly', 'yearly'] if col in price_df.columns: components.append(col) # 添加外生变量 fig, axes = plt.subplots(len(components), 1, figsize=(14, 10)) fig.suptitle(f'{target_category}销售总量各成分分解', fontsize=16) for i, comp in enumerate(components): ax = axes[i] if comp == 'trend': ax.plot(forecast['ds'], forecast[comp], 'g-', linewidth=2) ax.set_title('趋势项', fontsize=12) elif comp == 'holidays': holiday_vals = forecast[comp].dropna() if not holiday_vals.empty: ax.bar(holiday_vals.index, holiday_vals, color='orange') ax.set_title('节假日项', fontsize=12) else: ax.text(0.5, 0.5, '无节假日数据', ha='center', va='center', fontsize=12) elif comp == 'weekly': # 获取一周内的模式 weekly = forecast[['ds', 'weekly']].copy() weekly['day_of_week'] = weekly['ds'].dt.day_name() weekly_avg = weekly.groupby('day_of_week')['weekly'].mean() # 按星期顺序排序 days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] weekly_avg = weekly_avg.reindex(days) weekly_avg.plot(kind='bar', ax=ax, color='purple') ax.set_title('周度周期项', fontsize=12) ax.set_xticklabels(['周一', '周二', '周三', '周四', '周五', '周六', '周日']) elif comp == 'yearly': # 获取一年内的模式 yearly = forecast[['ds', 'yearly']].copy() yearly['day_of_year'] = yearly['ds'].dt.dayofyear yearly_avg = yearly.groupby('day_of_year')['yearly'].mean() yearly_avg.plot(ax=ax, color='brown') ax.set_title('年度周期项', fontsize=12) else: # 外生变量 ax.plot(forecast['ds'], forecast[comp], 'm-', linewidth=2) ax.set_title(f'外生变量项: {comp}', fontsize=12) ax.grid(True, linestyle='--', alpha=0.5) plt.tight_layout(rect=[0, 0, 1, 0.96]) # 为标题留出空间 plot_path = f'{target_category}_各成分分解.png' plt.savefig(plot_path, dpi=300) plt.close() print_progress(4.7, f"保存成分分解图: {plot_path}", indent=3) # 3. 价格与销售量关系图 plt.figure(figsize=(10, 6)) plt.scatter( merged_df[col], merged_df['y'], alpha=0.6, c=pd.to_datetime(merged_df['ds']).astype(int), cmap='viridis' ) plt.colorbar(label='日期') plt.title(f'{target_category}价格与销售量关系', fontsize=14) plt.xlabel('价格 (元/千克)', fontsize=12) plt.ylabel('销售量 (千克)', fontsize=12) plt.grid(True, linestyle='--', alpha=0.5) plt.tight_layout() plot_path = f'{target_category}_价格与销售量关系.png' plt.savefig(plot_path, dpi=300) plt.close() print_progress(4.7, f"保存价格-销售量关系图: {plot_path}", indent=3) print_progress(4, "Prophet分析完成") return pd.DataFrame(results).set_index('品类名称') # 主流程 def main(): print("=" * 70) print("开始执行蔬菜销售预测分析程序") print("=" * 70) # 数据预处理 df_merged, df_items = preprocess_data() # 创建透视表 final_dfs = create_pivot_tables(df_merged, df_items) # 保存结果 save_results(final_dfs) # Prophet分析与可视化 print_progress(5, "开始Prophet时间序列分析") prophet_results = prophet_analysis_with_visualization(target_category='花叶类') # 输出结果 print("\n" + "=" * 70) print("表3 各品类蔬菜的 Prophet 模型拟合水平") print("=" * 70) print(prophet_results.round(2)) print("\n" + "=" * 70) print("程序执行完成!所有分析结果已保存") print("=" * 70) if __name__ == "__main__": main()这段代码运行后出现如下报错:Traceback (most recent call last): File "pandas\_libs\tslibs\parsing.pyx", line 679, in pandas._libs.tslibs.parsing.dateutil_parse ValueError: day is out of range for month The above exception was the direct cause of the following exception: Traceback (most recent call last): File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 300, in <module> main() File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 286, in main prophet_results = prophet_analysis_with_visualization(target_category='花叶类') File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 131, in prophet_analysis_with_visualization df = sales_df[['ds', col]].rename(columns={col: 'y'}).query('y > 0') File "D:\lib\site-packages\pandas\core\frame.py", line 4440, in query res = self.eval(expr, **kwargs) File "D:\lib\site-packages\pandas\core\frame.py", line 4566, in eval return _eval(expr, inplace=inplace, **kwargs) File "D:\lib\site-packages\pandas\core\computation\eval.py", line 336, in eval parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 809, in __init__ self.terms = self.parse() File "D:\lib\site-packages\pandas\core\computation\expr.py", line 828, in parse return self._visitor.visit(self.expr) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 421, in visit_Module return self.visit(expr, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 424, in visit_Expr return self.visit(node.value, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 719, in visit_Compare return self.visit(binop) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 537, in visit_BinOp return self._maybe_evaluate_binop(op, op_class, left, right) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 504, in _maybe_evaluate_binop res = op(lhs, rhs) File "D:\lib\site-packages\pandas\core\computation\ops.py", line 380, in __init__ self.convert_values() File "D:\lib\site-packages\pandas\core\computation\ops.py", line 478, in convert_values v = Timestamp(ensure_decoded(v)) File "pandas\_libs\tslibs\timestamps.pyx", line 1667, in pandas._libs.tslibs.timestamps.Timestamp.__new__ File "pandas\_libs\tslibs\conversion.pyx", line 280, in pandas._libs.tslibs.conversion.convert_to_tsobject File "pandas\_libs\tslibs\conversion.pyx", line 557, in pandas._libs.tslibs.conversion.convert_str_to_tsobject File "pandas\_libs\tslibs\parsing.pyx", line 329, in pandas._libs.tslibs.parsing.parse_datetime_string File "pandas\_libs\tslibs\parsing.pyx", line 683, in pandas._libs.tslibs.parsing.dateutil_parse pandas._libs.tslibs.parsing.DateParseError: day is out of range for month: 0
08-07
Traceback (most recent call last): File "pandas\_libs\tslibs\parsing.pyx", line 679, in pandas._libs.tslibs.parsing.dateutil_parse ValueError: day is out of range for month The above exception was the direct cause of the following exception: Traceback (most recent call last): File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 300, in <module> main() File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 286, in main prophet_results = prophet_analysis_with_visualization(target_category='花叶类') File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 131, in prophet_analysis_with_visualization df = sales_df[['ds', col]].rename(columns={col: 'y'}).query('y > 0') File "D:\lib\site-packages\pandas\core\frame.py", line 4440, in query res = self.eval(expr, **kwargs) File "D:\lib\site-packages\pandas\core\frame.py", line 4566, in eval return _eval(expr, inplace=inplace, **kwargs) File "D:\lib\site-packages\pandas\core\computation\eval.py", line 336, in eval parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 809, in __init__ self.terms = self.parse() File "D:\lib\site-packages\pandas\core\computation\expr.py", line 828, in parse return self._visitor.visit(self.expr) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 421, in visit_Module return self.visit(expr, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 424, in visit_Expr return self.visit(node.value, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 719, in visit_Compare return self.visit(binop) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 537, in visit_BinOp v = Timestamp(ensure_decoded(v)) File "pandas\_libs\tslibs\timestamps.pyx", line 1667, in pandas._libs.tslibs.timestamps.Timestamp.__new__ File "pandas\_libs\tslibs\conversion.pyx", line 280, in pandas._libs.tslibs.conversion.convert_to_tsobject File "pandas\_libs\tslibs\conversion.pyx", line 557, in pandas._libs.tslibs.conversion.convert_str_to_tsobject File "pandas\_libs\tslibs\parsing.pyx", line 329, in pandas._libs.tslibs.parsing.parse_datetime_string File "pandas\_libs\tslibs\parsing.pyx", line 683, in pandas._libs.tslibs.parsing.dateutil_parse pandas._libs.tslibs.parsing.DateParseError: day is out of range for month: 0 PS C:\Users\青云\AppData\Local\Programs\Microsoft VS Code> & D:\python.exe d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py:17: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid') ====================================================================== 开始执行蔬菜销售预测分析程序 ====================================================================== [22:51:07] 步骤 1: 开始数据预处理 [22:51:07] 步骤 1.1: 读取附件1和附件2数据 [22:51:45] 步骤 1.2: 合并数据集: 单品编码=251条, 销售记录=878503条 [22:51:45] 步骤 1.3: 处理退货数据 [22:51:45] 步骤 1: 数据预处理完成, 共处理878503条记录 [22:51:45] 步骤 2: 开始创建透视表 [22:51:45] 步骤 2.1: 计算每日指标 [22:51:45] 步骤 2.2: 计算退货率 [22:51:45] 步骤 2.3: 创建透视表 [22:51:45] 步骤 2: 透视表创建完成 [22:51:45] 步骤 3: 开始保存结果 [22:51:45] 步骤 3.1: 保存 price 数据到: D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C235问题二output_price.xlsx [22:51:48] 步骤 3.1: 保存 vol 数据到: D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C235问题二output_vol.xlsx [22:51:51] 步骤 3.1: 保存 sales 数据到: D:\虚拟c盘\大学\竞赛\数学建模\2023年C题\C235问题二output_sales.xlsx [22:51:54] 步骤 3: 结果保存完成 [22:51:54] 步骤 5: 开始Prophet时间序列分析 [22:51:54] 步骤 4: 开始Prophet分析 [22:51:54] 步骤 4.1: 读取预处理数据 [22:51:56] 步骤 4.2: 验证和清洗日期数据 [22:51:56] 步骤 4.2: 移除了 0 条无效日期记录 [22:51:56] 步骤 4.2: 有效日期范围: 2020-07-01 00:00:00 到 2023-06-30 00:00:00 [22:51:56] 步骤 4.2: 开始分析 4 个品类 [22:51:56] 步骤 4.2: 分析品类 1/4: ds Traceback (most recent call last): File "pandas\_libs\tslibs\parsing.pyx", line 679, in pandas._libs.tslibs.parsing.dateutil_parse ValueError: day is out of range for month The above exception was the direct cause of the following exception: Traceback (most recent call last): File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 324, in <module> main() File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 310, in main prophet_results = prophet_analysis_with_visualization(target_category='花叶类') File "d:/虚拟c盘/大学/竞赛/数学建模/2023年C题/C235问题二prophet.py", line 155, in prophet_analysis_with_visualization df = sales_df[['ds', col]].rename(columns={col: 'y'}).query('y > 0') File "D:\lib\site-packages\pandas\core\frame.py", line 4440, in query res = self.eval(expr, **kwargs) File "D:\lib\site-packages\pandas\core\frame.py", line 4566, in eval return _eval(expr, inplace=inplace, **kwargs) File "D:\lib\site-packages\pandas\core\computation\eval.py", line 336, in eval parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 809, in __init__ self.terms = self.parse() File "D:\lib\site-packages\pandas\core\computation\expr.py", line 828, in parse return self._visitor.visit(self.expr) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 421, in visit_Module return self.visit(expr, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 424, in visit_Expr return self.visit(node.value, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 719, in visit_Compare return self.visit(binop) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 415, in visit return visitor(node, **kwargs) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 537, in visit_BinOp return self._maybe_evaluate_binop(op, op_class, left, right) File "D:\lib\site-packages\pandas\core\computation\expr.py", line 504, in _maybe_evaluate_binop res = op(lhs, rhs) File "D:\lib\site-packages\pandas\core\computation\ops.py", line 380, in __init__ self.convert_values() File "D:\lib\site-packages\pandas\core\computation\ops.py", line 478, in convert_values v = Timestamp(ensure_decoded(v)) File "pandas\_libs\tslibs\timestamps.pyx", line 1667, in pandas._libs.tslibs.timestamps.Timestamp.__new__ File "pandas\_libs\tslibs\conversion.pyx", line 280, in pandas._libs.tslibs.conversion.convert_to_tsobject File "pandas\_libs\tslibs\conversion.pyx", line 557, in pandas._libs.tslibs.conversion.convert_str_to_tsobject File "pandas\_libs\tslibs\parsing.pyx", line 329, in pandas._libs.tslibs.parsing.parse_datetime_string File "pandas\_libs\tslibs\parsing.pyx", line 683, in pandas._libs.tslibs.parsing.dateutil_parse pandas._libs.tslibs.parsing.DateParseError: day is out of range for month: 0
08-07
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值