python实现RF.feature_importances的条形图

本文介绍了一种使用Python的matplotlib和pandas库来可视化特征重要性的方法。通过将特征重要性数据整理并绘制为水平条形图,清晰地展示了各特征在模型中的相对重要性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#coding:utf-8
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd

#-*- 原始数据 -*-
Feature_importances = [0.09874236361414918, 0.05457733080394871, 0.010137636755458375, 0.002168849354716167, 
                       0.001013334072272919, 0.0004140993059956171, 8.349594684160916e-05, 3.20916475647705e-05,
                       1.013794641507518e-06, 0.0, 0.09850544433863488, 0.09200726418964804, 0.08526823770386598, 
                       0.0929247547648456, 0.08577678907643776, 0.07479688092774066, 0.08069145257465207, 
                       0.10305018928137757, 0.11980877184720869, 0.0]

fea_label = ['fac_1','fac_2','fac_3','fac_4','fac_5','fac_6','fac_7','fac_8','fac_9','fac_10',
             'abe_1','abe_2','abe_3','abe_4','abe_5','abe_6','abe_7','abe_8','abe_9','abe_10']

Feature_importances = [round(x,4) for x in Feature_importances]
F2 = pd.Series(Feature_importances,index = fea_label)
F2 = F2.sort_values(ascending = True)
f_index = F2.index
f_values = F2.values

# -*-输出 -*- # 
print ('f_index:',f_index)
print ('f_values:',f_values)
#####################################
x_index = list(range(0,20))
x_index = [x/20 for x in x_index]
plt.rcParams['figure.figsize'] = (10,10)
plt.barh(x_index,f_values,height = 0.028 ,align="center",color = 'tan',tick_label=f_index)
plt.xlabel('importances')
plt.ylabel('features')
plt.show()

 

# -*- 输出index 与 importances 

#绘图部分:

 

import pandas as pd import numpy as np from sklearn.experimental import enable_iterative_imputer from sklearn.impute import IterativeImputer from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt # 加载数据 data = pd.read_excel("C:\\Users\\DELL\\Desktop\\神经外科-sheet2_imputed.xlsx", sheet_name='Sheet1') # 分离特征和目标 X = data.drop('病情变化', axis=1) y = data['病情变化'] # 使用迭代插补器处理缺失值 imputer = IterativeImputer(max_iter=10, random_state=42) X_imputed = imputer.fit_transform(X) # 转换为DataFrame以保留列名 X_imputed = pd.DataFrame(X_imputed, columns=X.columns) # 拆分数据集 X_train, X_test, y_train, y_test = train_test_split( X_imputed, y, test_size=0.2, random_state=42 ) # 标准化 scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # 训练模型 rf = RandomForestRegressor(n_estimators=100, random_state=42) rf.fit(X_train_scaled, y_train) # 评估模型 y_pred = rf.predict(X_test_scaled) mse = mean_squared_error(y_test, y_pred) print(f"Mean Squared Error: {mse:.4f}") # 特征重要性分析 importances = rf.feature_importances_ sorted_idx = importances.argsort()[::-1] plt.figure(figsize=(12, 8)) plt.barh(range(len(sorted_idx)), importances[sorted_idx], align='center') plt.yticks(range(len(sorted_idx)), X.columns[sorted_idx]) plt.title("特征重要性") plt.xlabel("相对重要性") plt.gca().invert_yaxis() plt.tight_layout() plt.show() 条形图上显示数值
07-23
import pandas as pd import numpy as np from sklearn.experimental import enable_iterative_imputer from sklearn.impute import IterativeImputer from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False # 加载数据 data = pd.read_excel("C:\\Users\\DELL\\Desktop\\神经外科-sheet2_imputed.xlsx", sheet_name='Sheet2') # 分离特征和目标 X = data.drop('病情变化', axis=1) y = data['病情变化'] # 使用迭代插补器处理缺失值 imputer = IterativeImputer(max_iter=10, random_state=42) X_imputed = imputer.fit_transform(X) # 转换为DataFrame以保留列名 X_imputed = pd.DataFrame(X_imputed, columns=X.columns) # 拆分数据集 X_train, X_test, y_train, y_test = train_test_split( X_imputed, y, test_size=0.2, random_state=42 ) # 标准化 scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # 训练模型 rf = RandomForestRegressor(n_estimators=100, random_state=42) rf.fit(X_train_scaled, y_train) # 评估模型 y_pred = rf.predict(X_test_scaled) mse = mean_squared_error(y_test, y_pred) print(f"Mean Squared Error: {mse:.4f}") # 特征重要性分析 importances = rf.feature_importances_ sorted_idx = importances.argsort()[::-1] # 从大到小排序的索引 sorted_importances = importances[sorted_idx] # 排序后的重要性值 sorted_features = X.columns[sorted_idx] # 排序后的特征名 # 创建图表 plt.figure(figsize=(12, 8)) bars = plt.barh(range(len(sorted_idx)), sorted_importances, align='center', color='skyblue') # 在条形图上添加数值标签 for i, bar in enumerate(bars): # 计算标签位置 x_pos = bar.get_width() + 0.001 # 在条形右侧添加一点偏移 y_pos = bar.get_y() + bar.get_height() / 2 # 添加数值标签(格式化保留4位小数) plt.text(x_pos, y_pos, f'{sorted_importances[i]:.4f}', va='center', ha='left', fontsize=9) # 添加标题和标签 plt.title("特征重要性分析", fontsize=14, fontweight='bold') plt.xlabel("相对重要性", fontsize=12) plt.yticks(range(len(sorted_idx)), sorted_features, fontsize=10) # 添加网格线提高可读性 plt.grid(axis='x', linestyle='--', alpha=0.7) # 反转Y轴使最重要的特征在顶部 plt.gca().invert_yaxis() # 调整布局 plt.tight_layout() plt.show() 要求输出随机森林重要性的排序
最新发布
07-23
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestRegressor from sklearn.preprocessing import StandardScaler from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import mean_squared_error # 数据加载 data = pd.ExcelFile("C:\\Users\\DELL\\Desktop\\神经外科-sheet2_imputed.xlsx",'Sheet=Sheet1') # 这里替换为实际的数据路径 # 数据预处理:处理缺失值 data.fillna(data.mean(), inplace=True) # 用均值填充缺失值 # 特征标准化 scaler = StandardScaler() data_scaled = scaler.fit_transform(data) # 假设病情变化的标签为'病情变化' X = data_scaled[:, :-1] # 特征 y = data_scaled[:, -1] # 标签 # 拆分数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 使用随机森林模型进行训练 rf = RandomForestRegressor(n_estimators=100, random_state=42) rf.fit(X_train, y_train) # 预测并评估模型 y_pred = rf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error: ", mse) # 获取特征重要性 importances = rf.feature_importances_ # 按重要性排序 indices = np.argsort(importances)[::-1] print("Feature ranking:") for f in range(X.shape[1]): print(f"{f + 1}. Feature {indices[f]} - Importance: {importances[indices[f]]}") # 画出特征重要性图 import matplotlib.pyplot as plt plt.figure(figsize=(10, 6)) plt.title("Feature Importance") plt.bar(range(X.shape[1]), importances[indices], align="center") plt.xticks(range(X.shape[1]), indices) plt.xlim([-1, X.shape[1]]) plt.show()不能运行。修改一下
07-23
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值