基于# ===============================
# 第二部分:按月评估预测精度
# ===============================
best_model_name = comparison_df.index[0]
best_model_predictions = predictions[best_model_name]
# 使用最终集成模型的测试集预测结果
pred_test = best_model_predictions # 统一变量名用于后续分析
y_test = y_test_original
# 创建DataFrame
df_test = pd.DataFrame({
'dates_test': dates_test,
'y_test': y_test,
'pred_test': pred_test
})
# 提取月份和年份
df_test['month'] = df_test['dates_test'].dt.month
df_test['year'] = df_test['dates_test'].dt.year
# 初始化评估列表
evaluation = []
# 遍历每个月份
months = sorted(df_test['month'].unique())
for month in months:
month_data = df_test[df_test['month'] == month]
for year in sorted(month_data['year'].unique()):
specific_month_data = month_data[month_data['year'] == year]
if specific_month_data.empty:
continue
total_days = len(specific_month_data)
accurate_count = 0
overestimate_count = 0
underestimate_count = 0
# 逐日评估
for _, row in specific_month_data.iterrows():
lower_bound = row['pred_test'] * 0.75
upper_bound = row['pred_test'] * 1.25
actual_value = row['y_test']
if lower_bound <= actual_value <= upper_bound:
accurate_count += 1
elif actual_value < lower_bound:
overestimate_count += 1
elif actual_value > upper_bound:
underestimate_count += 1
# 计算比率
accuracy_rate = accurate_count / total_days
overestimate_rate = overestimate_count / total_days
underestimate_rate = underestimate_count / total_days
# 存储结果
evaluation.append({
'Year': year,
'Month': month,
'Accuracy Rate': accuracy_rate,
'Overestimate Rate': overestimate_rate,
'Underestimate Rate': underestimate_rate
})
# 转换为 DataFrame 并输出
evaluation_df = pd.DataFrame(evaluation)
print("\n📊 Monthly Evaluation Results:")
print(evaluation_df)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 设置全局字体为 Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
# 假定 evaluation_df 已存在
evaluation_df['Year'] = evaluation_df['Year'].astype(int)
evaluation_df['Month'] = evaluation_df['Month'].astype(int)
# 创建 YearMonth 列用于排序
evaluation_df['YearMonth'] = evaluation_df['Year'].astype(str) + '-' + evaluation_df['Month'].astype(str).str.zfill(2)
evaluation_df = evaluation_df.sort_values(by='YearMonth').reset_index(drop=True)
# 2. 绘图设置
# -----------------------------
plt.figure(figsize=(18, 7)) # 稍微加宽,让柱子之间显得更宽松
ind = ind = np.arange(len(evaluation_df))
width = 0.3 # 减小柱子宽度,制造更多空白
# 绘制柱状图:从左到右为 Underestimate, Accuracy, Overestimate
accuracy_bar = plt.bar(ind- width, evaluation_df['Accuracy Rate'], width, color='green', label='Accuracy Rate')
underestimate_bar = plt.bar(ind , evaluation_df['Underestimate Rate'], width, color='red', label='Underestimate Rate')
overestimate_bar = plt.bar(ind + width, evaluation_df['Overestimate Rate'], width, color='blue', label='Overestimate Rate')
# 添加数值标签
for bars in [ accuracy_bar, underestimate_bar,overestimate_bar]:
for bar in bars:
height = bar.get_height()
plt.text(bar.get_x() + bar.get_width() / 2.0, height, f'{height:.1%}',
ha='center', va='bottom', fontsize=9)
# 折线图(只画准确率)
plt.plot(ind- width, evaluation_df['Accuracy Rate'], marker='o', linestyle='-', color='green',
label='Accuracy Line', linewidth=2, markersize=5)
# 设置标题和坐标轴
plt.title('AQI Range Forecast Accuracy Evaluation', fontsize=16)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Rate', fontsize=12)
# x轴标签旋转
plt.xticks(ind, evaluation_df['YearMonth'], rotation=45)
# ❗关键修改:将图例放在图表外部右侧,防止遮挡
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
# 自动调整布局,防止裁剪
plt.tight_layout()
# 显示图形
plt.show()下面的代码出现# 定义AQI分类函数(返回英文标签,与labels一致)
def classify_aqi(aqi):
if aqi <= 50:
return 'Excellent'
elif aqi <= 100:
return 'Good'
elif aqi <= 150:
return 'Light pollution'
elif aqi <= 200:
return 'Moderate pollution'
elif aqi <= 300:
return 'Heavy pollution'
else:
return 'Severe pollution'
# 对真实值和预测值进行AQI类别分类
df_test['y_test_category'] = df_test['y_test'].apply(classify_aqi)
df_test['pred_test_category'] = df_test['pred_test'].apply(classify_aqi)
# 创建AQI类别级别映射(对应英文标签)
category_levels = {
'Excellent': 1,
'Good': 2,
'Light pollution': 3,
'Moderate pollution': 4,
'Heavy pollution': 5,
'Severe pollution': 6
}
# 根据类别级别判断高估还是低估(返回英文状态)
def assess_prediction(row):
actual_level = category_levels[row['y_test_category']]
predicted_level = category_levels[row['pred_test_category']]
if actual_level == predicted_level:
return 'Accurate level'
elif predicted_level == actual_level + 1:
return 'Overestimate level'
elif predicted_level == actual_level - 1:
return 'Underestimate level'
else:
return 'Significant Deviation' # 相差一个以上级别
# 应用函数判断每天的预测
df['assessment'] = df.apply(assess_prediction, axis=1)
# 按月汇总
monthly_stats = df.groupby(df['date'].dt.to_period('M')).assessment.value_counts().unstack().fillna(0)
# 计算准确率、高估率和低估率
monthly_stats['total'] = monthly_stats.sum(axis=1)
monthly_stats['Level Accuracy Rate'] = monthly_stats['Accurate level'] / monthly_stats['total']
monthly_stats['Level Overestimate Rate'] = monthly_stats['Overestimate level'] / monthly_stats['total']
monthly_stats['Level Underestimate Rate'] = monthly_stats['Underestimate level'] / monthly_stats['total']出现yError Traceback (most recent call last)
File D:\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3805, in Index.get_loc(self, key)
3804 try:
-> 3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()
File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()
File pandas\\_libs\\hashtable_class_helper.pxi:7081, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas\\_libs\\hashtable_class_helper.pxi:7089, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'y_test_category'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[11], line 44
41 return 'Significant Deviation' # 相差一个以上级别
43 # 应用函数判断每天的预测
---> 44 df['assessment'] = df.apply(assess_prediction, axis=1)
46 # 按月汇总
47 monthly_stats = df.groupby(df['date'].dt.to_period('M')).assessment.value_counts().unstack().fillna(0)
File D:\anaconda3\Lib\site-packages\pandas\core\frame.py:10374, in DataFrame.apply(self, func, axis, raw, result_type, args, by_row, engine, engine_kwargs, **kwargs)
10360 from pandas.core.apply import frame_apply
10362 op = frame_apply(
10363 self,
10364 func=func,
(...)
10372 kwargs=kwargs,
10373 )
> 10374 return op.apply().__finalize__(self, method="apply")
File D:\anaconda3\Lib\site-packages\pandas\core\apply.py:916, in FrameApply.apply(self)
913 elif self.raw:
914 return self.apply_raw(engine=self.engine, engine_kwargs=self.engine_kwargs)
--> 916 return self.apply_standard()
File D:\anaconda3\Lib\site-packages\pandas\core\apply.py:1063, in FrameApply.apply_standard(self)
1061 def apply_standard(self):
1062 if self.engine == "python":
-> 1063 results, res_index = self.apply_series_generator()
1064 else:
1065 results, res_index = self.apply_series_numba()
File D:\anaconda3\Lib\site-packages\pandas\core\apply.py:1081, in FrameApply.apply_series_generator(self)
1078 with option_context("mode.chained_assignment", None):
1079 for i, v in enumerate(series_gen):
1080 # ignore SettingWithCopy here in case the user mutates
-> 1081 results[i] = self.func(v, *self.args, **self.kwargs)
1082 if isinstance(results[i], ABCSeries):
1083 # If we have a view on v, we need to make a copy because
1084 # series_generator will swap out the underlying data
1085 results[i] = results[i].copy(deep=False)
Cell In[11], line 32, in assess_prediction(row)
31 def assess_prediction(row):
---> 32 actual_level = category_levels[row['y_test_category']]
33 predicted_level = category_levels[row['pred_test_category']]
34 if actual_level == predicted_level:
File D:\anaconda3\Lib\site-packages\pandas\core\series.py:1121, in Series.__getitem__(self, key)
1118 return self._values[key]
1120 elif key_is_scalar:
-> 1121 return self._get_value(key)
1123 # Convert generator to list before going through hashable part
1124 # (We will iterate through the generator there to check for slices)
1125 if is_iterator(key):
File D:\anaconda3\Lib\site-packages\pandas\core\series.py:1237, in Series._get_value(self, label, takeable)
1234 return self._values[label]
1236 # Similar to Index.get_value, but we do not fall back to positional
-> 1237 loc = self.index.get_loc(label)
1239 if is_integer(loc):
1240 return self._values[loc]
File D:\anaconda3\Lib\site-packages\pandas\core\indexes\base.py:3812, in Index.get_loc(self, key)
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
-> 3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
KeyError: 'y_test_category'问题
最新发布