Random_Logistic_regression中get_support()方法

博客讲述使用随机逻辑回归模型进行特征筛选,调用get_support方法获取结果时出现索引错误。指出get_support的indices参数默认是False,返回布尔数组,将其改为True,返回整型数组,即可解决该错误。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

使用随机逻辑回归模型进行特征筛选,使用get_support方法获取结果时出现以下错误:

IndexError: boolean index did not match indexed array along dimension 0; dimension is 9 but corresponding boolean dimension is 8

解决办法:

get_support(indices=False)indices默认是False,返回一个类型是boolean的数组;indices为True,返回一个整型数组,

所以解决办法就是把参数改变。默认的是False,改为get_support(indices=True)就好了。
 

import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.feature_selection import f_classif from statsmodels.stats.outliers_influence import variance_inflation_factor # 1. 计算特征相关性矩阵 corr_matrix = X.corr() plt.figure(figsize=(15, 12)) sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm", annot_kws={"size": 8}, cbar_kws={"shrink": 0.8}) plt.title("特征相关性热力图") plt.show() # 2. 计算VIF(方差膨胀因子)检测多重共线性 vif_data = pd.DataFrame() vif_data["Feature"] = feature_columns vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(feature_columns))] print("多重共线性诊断 (VIF > 5 表示高共线性):") print(vif_data.sort_values("VIF", ascending=False)) # 3. 创建统一的重要性对比表 # 计算F检验重要性(单变量) f_scores, _ = f_classif(X, y) f_importance = f_scores / f_scores.max() # 标准化 # 计算模型系数重要性(多变量) coef_importance = np.abs(model.coef_[0]) coef_importance = coef_importance / coef_importance.max() # 标准化 # 创建对比DataFrame importance_df = pd.DataFrame({ "Feature": feature_columns, "F_Importance": f_importance, "Coef_Importance": np.nan # 初始化为NaN }) # 仅填充被选择的特征 selected_indices = selector.get_support(indices=True) for idx in selected_indices: importance_df.at[idx, "Coef_Importance"] = coef_importance[list(selected_indices).index(idx)] # 添加差异指标 importance_df["Importance_Diff"] = np.abs( importance_df["F_Importance"] - importance_df["Coef_Importance"] ) print("\n统一特征重要性对比:") print(importance_df.sort_values("Importance_Diff", ascending=False)) # 4. 可视化对比 plt.figure(figsize=(14, 8)) plt.scatter(importance_df["F_Importance"], importance_df["Coef_Importance"], s=100) # 添加标签和参考线 for i, row in importance_df.iterrows(): if not np.isnan(row["Coef_Importance"]): plt.text(row["F_Importance"] + 0.02, row["Coef_Importance"] + 0.02, row["Feature"], fontsize=9) else: plt.text(row["F_Importance"] + 0.02, 0.02, f"{row['Feature']} (未选择)", fontsize=9, color="red") plt.axline((0, 0), slope=1, color="red", linestyle="--", alpha=0.5) plt.xlabel("单变量重要性 (F检验)") plt.ylabel("多变量重要性 (模型系数绝对值)") plt.title("单变量与多变量特征重要性对比") plt.grid(True, alpha=0.3) plt.show() # 5. 基于领域知识调整模型 # 示例:强制包含临床重要特征 clinical_features = ["年龄", "GCS", "意识情况"] # 临床重要特征 # 创建新的特征选择器 from sklearn.base import BaseEstimator, TransformerMixin class ClinicalFeatureSelector(BaseEstimator, TransformerMixin): def __init__(self, clinical_features, k=10): self.clinical_features = clinical_features self.k = k self.selector = None def fit(self, X, y=None): # 首先确保包含临床重要特征 clinical_indices = [list(X.columns).index(f) for f in self.clinical_features if f in X.columns] # 使用SelectKBest选择其他特征 self.selector = SelectKBest(f_classif, k=self.k - len(clinical_indices)) other_features = [f for f in X.columns if f not in self.clinical_features] self.selector.fit(X[other_features], y) return self def transform(self, X): clinical_data = X[self.clinical_features].values other_data = self.selector.transform(X[[f for f in X.columns if f not in self.clinical_features]]) return np.hstack([clinical_data, other_data]) def get_support(self): clinical_mask = [True if f in self.clinical_features else False for f in feature_columns] other_mask = self.selector.get_support() return np.array(clinical_mask + list(other_mask)) # 使用新的特征选择器 clinical_selector = ClinicalFeatureSelector(clinical_features=clinical_features, k=10) X_clinical = clinical_selector.fit_transform(X, y) # 重新训练模型 model_clinical = LogisticRegression(max_iter=1000, random_state=42) model_clinical.fit(X_clinical, y_res) # 比较特征选择结果 print("\n原始特征选择 vs 临床调整特征选择:") print("原始选择:", [feature_columns[i] for i in selector.get_support(indices=True)]) print("临床调整:", [feature_columns[i] for i in clinical_selector.get_support(indices=True)]) 显示好多未引用,改
最新发布
07-23
评论 11
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值