!improtant 用法

本文介绍了CSS中!important声明的作用,包括如何在IE6及以上版本浏览器中提高样式优先级,以及如何强制重定义样式而不遵循父ID样式。此外还探讨了!important在不同浏览器版本中的兼容性问题。

先看W3C对其说明

提升指定样式规则的应用优先权。
IE6及以下浏览器有个比较显式的支持问题存在,!important并不覆盖掉在同一条样式的后面的规则。

IE6是不识别!important这条语法的,所以,在IE6中,与其在一行的样式将被忽略,IE7+版本是支持的。
所以一个用法出来了:
在IE6及向上版本,提供两种不同的样式。而!important作用就是在这些相同样式中,提高高版本样式的优先性

众所周知,css样式中,对于同一种属性是自上而下应用,即后者会覆盖前者。
示例:

p {
    color = blue;  //将被后者覆盖
    color = red;   //显示红色
}

然而,若是加上了 !important ,情况就不一样了(仅区别IE6与向上版本)。

p {
    color = blue !important;  //IE7+版本显示蓝色
    color = red;  //IE6照旧显示红色
}

另外,因为ID的优先性是高于class的,所以!important的另一个用法,就是强制重定义,另起炉灶,实现自己的样式,而不实现父id的样式。

总结:

  1. 提高优先性,区别对待IE6和IE7+版本浏览器。
  2. 强制重定义,不必遵循父id的原有样式。
bool VehicleSpecificCharacteristics::saveDataToSQL(QString & errmsg) { //TODO:做具体实现 //获取时间 QString m_saveTime = QDateTime::currentDateTime().toString("yyyy-MM-dd HH:mm:ss"); //TODO:获取被选中行的数据 QStandardItemModel *model = qobject_cast<QStandardItemModel*>(ui->dataTableView->model()); if (model) { int j = ui->dataTableView->model()->rowCount(); for (int i = 0; i < model->rowCount(); ++i) { QString DrwNo = model->data(model->index(i, 1)).toString(); QString Version = model->data(model->index(i, 2)).toString(); QString FeatureCode = model->data(model->index(i, 4)).toString(); QString Category = model->data(model->index(i, 11)).toString(); QString Drawinglocation = model->data(model->index(i, 13)).toString(); QString Specification = model->data(model->index(i, 17)).toString(); //QString Specification = QString::fromLocal8Bit("类型:几何公差,公差符号:*&00BF,公差值:*&00D81.5*&00CC,第一基准:D,第二基准:E,第三基准:F,上附加文本:M6,下附加文本:,左附加文本:,右附加文本:△● <54>,是否测点:true,尺寸id:48"); QString DimID ; int pos = Specification.indexOf("尺寸id:"); if (pos != -1) { DimID = Specification.mid(pos + 5); } SpecialSymbolSQLDataReplace(0, Specification); QString DataComparison = model->data(model->index(i, 18)).toString(); QString IsHandLost = model->data(model->index(i, 19)).toString(); QString tagID = model->data(model->index(i, 20)).toString(); QString isDetectingPoint = model->data(model->index(i, 21)).toString(); QString p_FeaturesNumber, d_FeaturesNumber; QString key, improtant; key = ui->rockNumLabel->text(); improtant = ui->rockNumLabel_2->text(); p_FeaturesNumber = key + '|' + improtant; key = ui->localNumLabel_1->text(); improtant = ui->localNumLabel_2->text(); d_FeaturesNumber = key + '|' + improtant; QString strSql, strSql1; QVector<QVector<QString>> dataTemp; strSql = QString::fromLocal8Bit("SELECT * FROM SpecialCharacteristic_Record_1 WHERE 图号='%1' AND 标识ID='%2'").arg(DrwNo).arg(tagID); bool anser = SqlServer::instance_->GetSQLData(strSql, dataTemp); if (true) { if (dataTemp.size() > 0) { strSql1 = QString::fromLocal8Bit("UPDATE SpecialCharacteristic_Record_1 SET 图纸版本='%1',类别 ='%2', 图纸位置 ='%3',规格 ='%4', 数据对比结果 ='%5', 是否手输 ='%6',是否测点='%7',磐石特性数量='%8',图纸特性数量='%9',用户='%10',保存时间='%11',尺寸ID='%12' WHERE 图号='%13' AND 标识ID='%14'").arg(Version) .arg(Category).arg(Drawinglocation).arg(Specification).arg(DataComparison).arg(IsHandLost).arg(isDetectingPoint) .arg(p_FeaturesNumber).arg(d_FeaturesNumber).arg(StrUserName).arg(m_saveTime).arg(DimID).arg(DrwNo).arg(tagID); } else { strSql1 = QString::fromLocal8Bit("INSERT INTO SpecialCharacteristic_Record_1 (图号, 图纸版本, 特性编码, 类别, 图纸位置, 规格, 数据对比结果, 是否手输,标识ID,是否测点,磐石特性数量,图纸特性数量,用户,保存时间,尺寸ID) VALUES ('%1', '%2', '%3', '%4', '%5', '%6','%7','%8','%9','%10','%11','%12','%13','%14','%15')") .arg(DrwNo).arg(Version).arg(FeatureCode).arg(Category).arg(Drawinglocation) .arg(Specification).arg(DataComparison).arg(IsHandLost).arg(tagID) .arg(isDetectingPoint).arg(p_FeaturesNumber).arg(d_FeaturesNumber) .arg(StrUserName).arg(m_saveTime).arg(DimID); } SqlServer::instance_->RunSQL(strSql1); } } }//TODO:这里没有对插入的错误判断 else { SqlServer::instance_->CloseServer(); return false; } return false; }解析该代码,是如何连接数据库的
最新发布
11-08
bool VehicleSpecificCharacteristics::saveDataToSQL(QString & errmsg) { //TODO:做具体实现 //获取时间 QString m_saveTime = QDateTime::currentDateTime().toString(“yyyy-MM-dd HH:mm:ss”); //TODO:获取被选中行的数据 QStandardItemModel model = qobject_cast<QStandardItemModel>(ui->dataTableView->model()); if (model) { int j = ui->dataTableView->model()->rowCount(); for (int i = 0; i < model->rowCount(); ++i) { QString DrwNo = model->data(model->index(i, 1)).toString(); QString Version = model->data(model->index(i, 2)).toString(); QString FeatureCode = model->data(model->index(i, 4)).toString(); QString Category = model->data(model->index(i, 11)).toString(); QString Drawinglocation = model->data(model->index(i, 13)).toString(); QString Specification = model->data(model->index(i, 17)).toString(); //QString Specification = QString::fromLocal8Bit(“类型:几何公差,公差符号:&00BF,公差值:&00D81.5*&00CC,第一基准:D,第二基准:E,第三基准:F,上附加文本:M6,下附加文本:,左附加文本:,右附加文本:△● <54>,是否测点:true,尺寸id:48”); QString DimID ; int pos = Specification.indexOf(“尺寸id:”); if (pos != -1) { DimID = Specification.mid(pos + 5); } SpecialSymbolSQLDataReplace(0, Specification); QString DataComparison = model->data(model->index(i, 18)).toString(); QString IsHandLost = model->data(model->index(i, 19)).toString(); QString tagID = model->data(model->index(i, 20)).toString(); QString isDetectingPoint = model->data(model->index(i, 21)).toString(); QString p_FeaturesNumber, d_FeaturesNumber; QString key, improtant; key = ui->rockNumLabel->text(); improtant = ui->rockNumLabel_2->text(); p_FeaturesNumber = key + ‘|’ + improtant; key = ui->localNumLabel_1->text(); improtant = ui->localNumLabel_2->text(); d_FeaturesNumber = key + ‘|’ + improtant; QString strSql, strSql1; QVector<QVector> dataTemp; strSql = QString::fromLocal8Bit(“SELECT * FROM SpecialCharacteristic_Record_1 WHERE 图号=‘%1’ AND 标识ID=‘%2’”).arg(DrwNo).arg(tagID); bool anser = SqlServer::instance_->GetSQLData(strSql, dataTemp); if (true) { if (dataTemp.size() > 0) { strSql1 = QString::fromLocal8Bit(“UPDATE SpecialCharacteristic_Record_1 SET 图纸版本=‘%1’,类别 =‘%2’, 图纸位置 =‘%3’,规格 =‘%4’, 数据对比结果 =‘%5’, 是否手输 =‘%6’,是否测点=‘%7’,磐石特性数量=‘%8’,图纸特性数量=‘%9’,用户=‘%10’,保存时间=‘%11’,尺寸ID=‘%12’ WHERE 图号=‘%13’ AND 标识ID=‘%14’”).arg(Version) .arg(Category).arg(Drawinglocation).arg(Specification).arg(DataComparison).arg(IsHandLost).arg(isDetectingPoint) .arg(p_FeaturesNumber).arg(d_FeaturesNumber).arg(StrUserName).arg(m_saveTime).arg(DimID).arg(DrwNo).arg(tagID); } else { strSql1 = QString::fromLocal8Bit(“INSERT INTO SpecialCharacteristic_Record_1 (图号, 图纸版本, 特性编码, 类别, 图纸位置, 规格, 数据对比结果, 是否手输,标识ID,是否测点,磐石特性数量,图纸特性数量,用户,保存时间,尺寸ID) VALUES (‘%1’, ‘%2’, ‘%3’, ‘%4’, ‘%5’, ‘%6’,‘%7’,‘%8’,‘%9’,‘%10’,‘%11’,‘%12’,‘%13’,‘%14’,‘%15’)”) .arg(DrwNo).arg(Version).arg(FeatureCode).arg(Category).arg(Drawinglocation) .arg(Specification).arg(DataComparison).arg(IsHandLost).arg(tagID) .arg(isDetectingPoint).arg(p_FeaturesNumber).arg(d_FeaturesNumber) .arg(StrUserName).arg(m_saveTime).arg(DimID); } SqlServer::instance_->RunSQL(strSql1); } } }//TODO:这里没有对插入的错误判断 else { SqlServer::instance_->CloseServer(); return false; } return false; }根据该代码修改下列代码中的数据库以及链接数据库的方式:void VehicleSpecificCharacteristics::UploadRecordToSql() { // 获取当前时间 QString uploadTime = QDateTime::currentDateTime().toString(“yyyy-MM-dd hh:mm:ss”); // 读取INI配置 QSettings settings(IniFilePath, QSettings::IniFormat); QString strIP = settings.value("MDSCBB/HOSTDB2", "").toString(); // 数据库连接 QSqlDatabase db = QSqlDatabase::addDatabase("QODBC", "SpecialCharacteristicConnection"); db.setDatabaseName(QString("DRIVER={SQL Server};SERVER=%1;").arg(strIP)); bool ret = db.open(); bool ret1 = false; if (ret) { // 获取模型 QStandardItemModel* model = qobject_cast<QStandardItemModel*>(ui->dataTableView->model()); if (!model) { qWarning() << "无效的表格模型"; return; } // 遍历所有行 for (int i = 0; i < model->rowCount(); i++) { // 获取复选框状态 QStandardItem* checkItem = model->item(i, 0); if (checkItem && checkItem->checkState() == Qt::Checked) { // 获取各列数据 - 使用安全访问方式 QString DrwNo = model->item(i, static_cast<int>(COLNAME::DRAWING_CODE)) ? model->item(i, static_cast<int>(COLNAME::DRAWING_CODE))->text() : ""; // 图号 QString Version = model->item(i, static_cast<int>(COLNAME::DARWING_VERSION)) ? model->item(i, static_cast<int>(COLNAME::DARWING_VERSION))->text() : ""; // 图纸版本 QString FeatureCode = model->item(i, static_cast<int>(COLNAME::CODE)) ? model->item(i, static_cast<int>(COLNAME::CODE))->text() : ""; // 特性编码 QString Category = model->item(i, static_cast<int>(COLNAME::CLASS_NAME_LOCAL)) ? model->item(i, static_cast<int>(COLNAME::CLASS_NAME_LOCAL))->text() : ""; // 类别 QString Drawinglocation = model->item(i, static_cast<int>(COLNAME::DRAWING_LOCATION_LOCAL)) ? model->item(i, static_cast<int>(COLNAME::DRAWING_LOCATION_LOCAL))->text() : ""; // 图纸位置 QString Specification = model->item(i, static_cast<int>(COLNAME::QUALITY_SPEC_LOCAL1)) ? model->item(i, static_cast<int>(COLNAME::QUALITY_SPEC_LOCAL1))->text() : ""; // 规格 QString tagID = model->item(i, static_cast<int>(COLNAME::UNIQUE_ID)) ? model->item(i, static_cast<int>(COLNAME::UNIQUE_ID))->text() : ""; // 唯一标识ID // 处理特殊字符 Specification.replace("'", "''"); // 转义单引号 // 检查记录是否存在 QSqlQuery checkQuery(db); checkQuery.prepare("SELECT COUNT(*) FROM SpecialCharacteristic_Upload_Record " "WHERE 图号 = ? AND 图纸版本 = ? AND 标识ID = ?"); checkQuery.addBindValue(DrwNo); checkQuery.addBindValue(Version); checkQuery.addBindValue(tagID); QSqlQuery insertQuery(db); bool ret1 = false; if (checkQuery.exec() && checkQuery.next()) { int recordCount = checkQuery.value(0).toInt(); if (recordCount > 0) { // 更新现有记录 QSqlQuery updateQuery(db); updateQuery.prepare( "UPDATE SpecialCharacteristic_Upload_Record SET " "类别 = :category, 图纸位置 = :location, " "规格 = :spec, 用户 = :user, 上传时间 = :time " "WHERE 图号 = :drw AND 图纸版本 = :ver AND 标识ID = :tag" ); updateQuery.bindValue(":category", Category); updateQuery.bindValue(":location", Drawinglocation); updateQuery.bindValue(":spec", Specification); updateQuery.bindValue(":user", StrUserName); updateQuery.bindValue(":time", uploadTime); updateQuery.bindValue(":drw", DrwNo); updateQuery.bindValue(":ver", Version); updateQuery.bindValue(":tag", tagID); ret1 = updateQuery.exec(); } else { // 插入新记录 QSqlQuery insertQuery(db); insertQuery.prepare( "INSERT INTO SpecialCharacteristic_Upload_Record " "(图号, 图纸版本, 特性编码, 类别, 图纸位置, 规格, 标识ID, 用户, 上传时间) " "VALUES (:drw, :ver, :feat, :cat, :loc, :spec, :tag, :user, :time)" ); insertQuery.bindValue(":drw", DrwNo); insertQuery.bindValue(":ver", Version); insertQuery.bindValue(":feat", FeatureCode); insertQuery.bindValue(":cat", Category); insertQuery.bindValue(":loc", Drawinglocation); insertQuery.bindValue(":spec", Specification); insertQuery.bindValue(":tag", tagID); insertQuery.bindValue(":user", StrUserName); insertQuery.bindValue(":time", uploadTime); ret1 = insertQuery.exec(); } if (!ret1) { qWarning() << "SQL Error:" << insertQuery.lastError().text(); } } } } if (!ret1) { qWarning() << "保存失败"; // QMessageBox::warning(this, "错误", "保存失败", QMessageBox::Ok); } } else { qCritical() << "数据库连接失败:" << db.lastError().text(); // QMessageBox::critical(this, "错误", "数据库连接失败,请联系管理员"); } db.close(); QSqlDatabase::removeDatabase("SpecialCharacteristicConnection"); }
11-08
我要参加中国移动梧桐杯数据赛道的比赛,我将会提供我目前在它的系统中得分最高的代码,请你在这个代码的基础上,帮我进行调整来帮助我在这个比赛中获取更高的分数,请你直接在我下面的代码上进行修改改好之后发给我,注意!要确保你给我的代码可以顺利运行,并告知我大概运行完的耗时是多久。保证用了你的代码提交之后,我在它的比赛系统中所得的分数比我最好的分数还要高! 我的代码如下: import pandas as pd from pandas import Series import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix,classification_report from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier import lightgbm as lgb # import catboost as cb import xgboost as xgb from sklearn.linear_model import BayesianRidge from scipy.stats import randint, uniform import warnings pd.set_option('display.max_columns', None) # None表示显示所有列 ''' 在「Configuration」标签页的「Environment variables」下方,找到「VM options」(若没有则手动添加), 输入-Xmx2g(表示限制最大使用内存为 2GB,可根据电脑内存调整,如 4GB 内存的电脑建议设为 1-2GB)。 ''' warnings.filterwarnings('ignore') df_train= pd.read_csv( '/home/jovyan/output/dataA/train.csv') append_list=[16936, 24227, 10469, 3867, 32621, 25670, 704, 8715, 36938, 9599, 26901, 5466, 30038, 27722, 20097, 44076, 44510, 25927, 43500, 25292, 20980, 36553, 31715, 48212, 13993, 45508, 45573, 13915, 17362, 17902, 3870, 18499, 38359, 21417, 14355, 28542, 10915, 11047, 6392, 29410, 23132, 32978, 5528, 4123, 43106, 28447, 46005, 6201, 8425, 26277, 43955, 47491, 6683, 32659, 15943, 8895, 19256, 38659, 195, 34322, 37899, 40851, 12141, 46259, 23288, 42114, 35746, 34509, 18905, 9410, 38030, 28636, 37029, 47061, 31024, 29217, 18273, 31329, 26872, 26247, 26156, 40498, 40571, 20331, 34265, 34441, 33867, 28519, 43126, 28026, 13541, 10904, 11650, 31719, 44779, 20918, 34806, 2329, 11595, 30801] df_train=pd.concat([df_train,df_train.iloc[append_list]]) #删除异常值 df_dbscan=df_train.drop(['user_id','registration_date','residence_base_station_id' ,'residence_cell_id','tariff_id','registration_channel_id'],axis=1) from sklearn.cluster import DBSCAN from sklearn.preprocessing import StandardScaler # 对数据进行标准化 scaler = StandardScaler() df_scaled = scaler.fit_transform(df_dbscan) # 使用 DBSCAN 进行离群值检测,调整参数 dbscan = DBSCAN(eps=4.1, min_samples=3)#final_accuracy: 0.9604713689148218 final_f1: 0.9604110347555872 final_score: 0.9604532686670514 # dbscan = DBSCAN(eps=4, min_samples=3)#final_accuracy: 0.9602194787379973 final_f1: 0.9601591212171151 final_score: 0.9602013714817326 # dbscan = DBSCAN(eps=4.1, min_samples=4)#final_accuracy: 0.9588813908269472 final_f1: 0.9588206772705757 final_score: 0.9588631767600357 df_dbscan['labels'] = dbscan.fit_predict(df_scaled) # 计算离群值的数量(labels 为 -1 的是离群值) outliers_count = (df_dbscan['labels'] == -1).sum() print('离群值样本个数:', outliers_count) # 删除离群值样本 df_train_improtant = df_train[df_dbscan['labels'] == -1] df_train=pd.concat([df_train,df_train_improtant]).reset_index(drop=True) # exit() df_train['data']='train' df_testA = pd.read_csv('/home/jovyan/output/dataA/testA.csv') # df_testA.to_csv('testA1.csv') # exit() df_testA['data']='test' # df_train_1=df_train.drop(["is_positive"],axis=0,inplace=True) df = pd.concat([df_train,df_testA],join="inner") # df.drop(["Unnamed: 0"],axis=1,inplace=True)#删除索引列 #1.数据缺失 发现没有空缺值 # print(df.info()) #2.查看用户id是否有重复 数据重复 结果显示没有重复用户id has_duplicates = df['user_id'].nunique() == len(df) print(has_duplicates) # 分别取出数值类别列名称 和类别列名称 和需要特殊处理的列 # for column in df.columns: # if df[column].dtype=='object': # category_cols.append(column) # elif df[column].dtype=='float64' or 'int64': # numeric_cols.append(column) numeric_cols = ['age', 'over_limit_data(MB)', 'call_duration(minutes)', 'monthly_call_count', 'monthly_weekend_call_count', 'avg_call_duration(minutes)', 'avg_weekday_call_duration(minutes)', 'avg_weekend_call_duration(minutes)', 'residence_duration_9to11', 'residence_duration_11to14', 'residence_duration_14to17', 'residence_duration_17to21', 'residence_duration_21to23', 'residence_duration_24to6', 'total_residence_duration'] category_cols = ['user_id','registration_date', 'gender', 'uses_education_app', 'uses_entertainment_app', 'uses_shopping_app'] special_cols=['registration_channel_id''3-2-3', 'residence_base_station_id 2-剩下的', 'residence_cell_id 3-3-剩下的' ,'tariff_id''4-4-4', 'tariff_price(RMB)数值需要分几个小组', 'total_data(MB)数值需要分几个小组', 'total_voice(minutes)数值需要分几个小组'] print(len(numeric_cols)+len(category_cols)+len(special_cols)) #3.异常值处理 #5.特征工程处理 def preprocess(df): ###label字段处理 tariff_price(RMB) total_data(MB) df['year'] #1.分隔日期字段处理 df['registration_date']=df['registration_date'].str.replace('/','-') df['year'] = pd.to_datetime(df['registration_date']).dt.year # 日期 df['month'] = pd.to_datetime(df['registration_date']).dt.month # 时间 df['day'] = pd.to_datetime(df['registration_date']).dt.day # 周 #2. 第一到四季节 春夏秋冬 上学期间 和放假期间 df['season1']=df['month'].apply(lambda x: 1 if 1<=int(x)<=3 else 0) df['season2']=df['month'].apply(lambda x: 1 if 3 df['avg_call_duration(minutes)'].quantile(0.9)).astype( int) # 长通话用户标记 # 流量使用特征:超量比例、流量/通话行为关联 df['over_limit_ratio'] = df['over_limit_data(MB)'] / (df['total_data(MB)'] + 1e-6) # 超量流量占比 df['over_limit_samll']=df['over_limit_data(MB)'].apply(lambda x: 1 if 0100 else 0) df['data_per_call'] = df['total_data(MB)'] / (df['monthly_call_count'] + 1e-6) # 每次通话伴随流量使用 # APP使用组合特征:多APP使用行为(教育+购物=高价值用户?) df['multi_app_flag'] = ( (df['uses_education_app'] + df['uses_entertainment_app'] + df['uses_shopping_app']) >= 2).astype(int) df['edu_shopping_flag'] = (df['uses_education_app'] & df['uses_shopping_app']).astype(int) # 教育+购物用户 # ---------------------- 3. 统计特征(增强泛化) ---------------------- # 基站停留稳定性:各时段停留时长标准差(越小越稳定) residence_cols = [col for col in df.columns if 'residence_duration' in col] df['residence_std'] = df[residence_cols].std(axis=1) df['residence_max_ratio'] = df[residence_cols].max(axis=1) / df['total_residence_duration'] # 最长停留时段占比 # 套餐性价比特征:单价流量、单价通话 df['data_per_price'] = df['total_data(MB)'] / (df['tariff_price(RMB)'] + 1e-6) df['voice_per_price'] = df['total_voice(minutes)'] / (df['tariff_price(RMB)'] + 1e-6) # ---------------------- 4. 特殊字段处理(优化编码逻辑) ---------------------- # 基站/渠道ID:提取层级特征(避免原代码字符切割的冗余) df['registration_channel_level1'] = df['registration_channel_id'].astype(str).str[:2] # 一级渠道 df['registration_channel_level2'] = df['registration_channel_id'].astype(str).str[2:4] # 二级渠道 df['residence_cell_level1'] = df['residence_cell_id'].astype(str).str[:3] # 一级基站 df['tariff_type'] = df['tariff_id'].astype(str).str[:4] # 套餐类型前缀 # 数值分箱:使用分位数分箱(更适应数据分布,避免原代码固定区间的偏差) category_cols = [ 'tariff_price(RMB)', 'total_data(MB)', 'year', 'registration_channel_id_f3', 'registration_channel_id_m2' , 'residence_base_station_id_f2', 'residence_base_station_id_remain', 'residence_cell_id_f3', 'residence_cell_id_m3', 'residence_cell_id_remain', 'tariff_id_f4', 'tariff_id_m4', 'tariff_id_remain', 'multi_app_flag', 'edu_shopping_flag', 'long_call_flag', 'tariff_type','residence_cell_level1'] #5.对应标签进行编码处理 # 使用get_dummies进行One-Hot编码 df = pd.get_dummies(df, columns=category_cols) #6.删除不需要的列 df.drop(['registration_date','registration_channel_id','residence_base_station_id','residence_cell_id','tariff_id'],axis=1,inplace=True) #is_positive return df #生成训练集合测试集 df=preprocess(df) #4.正负样本数据均衡 from sklearn.neighbors import NearestNeighbors def smote_synthetic_samples(minority_samples, n_samples, k_neighbors=5, random_state=42): """ 实现SMOTE算法的核心部分,生成合成样本 参数: ◦ minority_samples: 少数类样本 ◦ n_samples: 需要生成的合成样本数量 ◦ k_neighbors: 近邻数量 ◦ random_state: 随机种子 """ np.random.seed(random_state) synthetic_samples = [] # 拟合KNN模型 knn = NearestNeighbors(n_neighbors=k_neighbors) knn.fit(minority_samples) for _ in range(n_samples): # 随机选择一个少数类样本 idx = np.random.randint(0, len(minority_samples)) sample = minority_samples[idx] # 找到K个最近邻 neighbors = knn.kneighbors(sample.reshape(1, -1), return_distance=False)[0] # 随机选择一个邻居 neighbor_idx = np.random.choice(neighbors) neighbor = minority_samples[neighbor_idx] # 生成合成样本 alpha = np.random.random() synthetic = sample + alpha * (neighbor - sample) synthetic_samples.append(synthetic) return np.array(synthetic_samples) def balance_by_smote(df, target_col='标签', k_neighbors=5, random_state=42): """使用自定义SMOTE算法实现样本均衡""" # 分离多数类和少数类 class_counts = df[target_col].value_counts() majority_class_idx = class_counts.idxmax() minority_class_idx = class_counts.idxmin() majority_class = df[df[target_col] == majority_class_idx] minority_class = df[df[target_col] == minority_class_idx] # 计算需要生成的样本数量 n_samples_needed = len(majority_class) - len(minority_class) # 提取少数类特征 minority_features = minority_class.drop(target_col, axis=1).values # 生成合成样本 synthetic_features = smote_synthetic_samples( minority_features, n_samples_needed, k_neighbors, random_state ) # 创建合成样本的DataFrame synthetic_df = pd.DataFrame( synthetic_features, columns=minority_class.drop(target_col, axis=1).columns ) synthetic_df[target_col] = minority_class_idx # 合并所有样本 balanced_df = pd.concat([majority_class, minority_class, synthetic_df], ignore_index=True) print(f"SMOTE 后的数据类别分布:") print(balanced_df[target_col].value_counts(normalize=True)) return balanced_df # 特征标准化 scaler = StandardScaler() X_train = scaler.fit_transform(df[df['data']=='train'].drop(['data','user_id'],axis=1)) X_testA= scaler.transform(df[df['data']=='test'].drop(['data','user_id'],axis=1)) X_testA=pd.DataFrame(X_testA) #样本均衡 data_smoth_before=pd.DataFrame(X_train) data_smoth_before['is_positive']=df_train['is_positive'] #新的smoth import pandas as pd from sklearn.neighbors import NearestNeighbors from sklearn.utils import shuffle data_somth_after=balance_by_smote(data_smoth_before,target_col='is_positive') #打散生成的数据 from sklearn.utils import shuffle data_somth_after = shuffle(data_somth_after) y=data_somth_after['is_positive'] X=data_somth_after.drop(['is_positive'],axis=1) # 特征筛选代码(保留前 500 个重要特征) # 训练基础LGB模型筛选特征 temp_lgb = lgb.LGBMClassifier(random_state=42, n_estimators=100) temp_lgb.fit(X, y) # 计算特征重要性并筛选前500个 feat_importance = pd.Series(temp_lgb.feature_importances_, index=X.columns) top_feat = feat_importance.sort_values(ascending=False).head(600).index X = X[top_feat] # 训练集仅保留top550特征 X_testA = X_testA[top_feat] # 测试集同步筛选 # y=y_train_tomek # X=X_train_tomek X.columns =X.columns.astype(str) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2 ) #建模预测 import catboost as cb Logistic_Regression=LogisticRegression(max_iter=1000, random_state=42) # 类别权重,) Random_Forest= RandomForestClassifier(random_state=42) # XGB用scale_pos_weight表示权重比 Gradient_Boosting_model=GradientBoostingClassifier(random_state=42) XGBoost_model=xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss') LightGBM_model=lgb.LGBMClassifier(random_state=42)# 类别权重 CatBoost_model=cb.CatBoostClassifier(random_state=42, verbose=0) # CatBoost_model=cb.CatBoostClassifier(random_state=42, verbose=0) # model_list=[Logistic_Regression,KNearest_Neighbors,Random_Forest,Gradient_Boosting_model,XGBoost_model,LightGBM_model,CatBoost_model] # model_list=[Logistic_Regression,KNearest_Neighbors,Random_Forest,Gradient_Boosting_model,XGBoost_model,LightGBM_model] # ensemble_train_results =[] # ensemble_test_results =[] # # for model in model_list: # #模型训练 # model_train=model.fit(X_train, y_train) # # 预测 # y_pred = model_train.predict(X_test) # y_pred_proba = model_train.predict_proba(X_test)[:, 1] if len(np.unique(y)) == 2 else None # #将测试集的预测结果保存起来,一会集成模型要用 # # ensemble_test_results.append(Series(y_pred_proba)) # #将训练集的预测结果保存起来,一会集成模型要用 # # y_train_proba=model_train.predict_proba(X_train)[:, 1] if len(np.unique(y)) == 2 else None # # ensemble_train_results.append(Series(y_train_proba)) # # 评估 # accuracy = accuracy_score(y_test, y_pred) # precision = precision_score(y_test, y_pred, average='weighted') # recall = recall_score(y_test, y_pred, average='weighted') # f1 = f1_score(y_test, y_pred, average='weighted') # print('accuracy:',accuracy,'f1:',f1,'score:',0.3*f1+0.7*accuracy) #6.参数调优 # 使用随机搜索优化超参数 # random_search = RandomizedSearchCV( # best_model, # param_distributions=param_dist, # n_iter=20, # 搜索20组参数组合 # cv=5, # 5折交叉验证 # scoring='f1_weighted', # n_jobs=-1, # 使用所有可用的CPU # random_state=42, # verbose=1 # ) #模型融合 # 根据以上AUC的结果,选择: LR 和 SVC 和 XGB 当做基模型 #%% # 把以上3个模型的预测结果集成起来 # ensemble_test_concat = pd.concat(ensemble_test_results, axis=1) # ensemble_train_concat = pd.concat(ensemble_train_results, axis=1) #%% #%% from sklearn.ensemble import VotingClassifier voting_clf = VotingClassifier(estimators=[('RF',Random_Forest),('GB',Gradient_Boosting_model), ('XGB',XGBoost_model),('LGBM',LightGBM_model), ('cb', CatBoost_model)],voting = 'soft') # # 采用贝叶斯回归作为结果融合的模型(final model) # # clf = BayesianRidge() # clf =LogisticRegression(max_iter=1000, random_state=42) # # 在训练数据上进行训练 # clf.fit(ensemble_train_concat, y_train) # #%% #训练softvote模型 voting_clf.fit(X_train, y_train) # 预测test样本 y_final_pred = voting_clf.predict(X_test) # 用训练集的OOF预测找最优阈值 # def find_best_threshold(y_true, y_proba, step=0.005): # """遍历阈值,找到最大化0.3*F1 + 0.7*准确率的阈值""" # best_score = 0.0 # best_threshold = 0.5 # thresholds = np.arange(0.005, 1.0, step) # # for threshold in thresholds: # y_pred = (y_proba >= threshold).astype(int) # acc = accuracy_score(y_true, y_pred) # f1 = f1_score(y_true, y_pred) # score = 0.3 * f1 + 0.7 * acc # # if score > best_score: # best_score = score # best_threshold = threshold # # print(f"最优阈值:{best_threshold:.2f},对应得分:{best_score:.4f}") # return best_threshold, best_score # accuracy = accuracy_score(y_test, y_final_pred ) precision = precision_score(y_test,y_final_pred , average='weighted') recall = recall_score(y_test,y_final_pred , average='weighted') f1 = f1_score(y_test,y_final_pred , average='weighted') print('final_accuracy:', accuracy, 'final_f1:', f1, 'final_score:', 0.3 * f1 + 0.7 * accuracy) # 预测test样本 y_testA_pred = voting_clf.predict(X_testA) # y_testA_final_pred= Series((y_testA_pred >= threshold).astype(int)) print(y_testA_pred) X_testA['is_positive']=Series(y_testA_pred ) X_testA['user_id']=df[df['data']=='test']['user_id'].values X_testA[['user_id','is_positive']].to_csv('submitA.csv',index=False,encoding='utf-8') 注意只能使用他们比赛环境中自带的库,不允许安装其他第三方库: 它的库只有以下: Package Version ------------------------------ ------------ absl-py 2.0.0 alembic 1.12.0 altair 5.1.2 anyio 4.0.0 argon2-cffi 23.1.0 argon2-cffi-bindings 21.2.0 arrow 1.3.0 asttokens 2.4.0 astunparse 1.6.3 async-generator 1.10 async-lru 2.0.4 attrs 23.1.0 Babel 2.13.0 backcall 0.2.0 backports.functools-lru-cache 1.6.5 beautifulsoup4 4.12.2 bleach 6.1.0 blinker 1.6.3 bokeh 3.3.0 boltons 23.0.0 Bottleneck 1.3.7 Brotli 1.1.0 cached-property 1.5.2 cachetools 5.3.1 catboost 1.2.8 certifi 2023.7.22 certipy 0.1.3 cffi 1.16.0 charset-normalizer 3.3.0 click 8.1.7 cloudpickle 3.0.0 colorama 0.4.6 comm 0.1.4 conda 23.9.0 conda-package-handling 2.2.0 conda_package_streaming 0.9.0 contourpy 1.1.1 cryptography 41.0.4 cycler 0.12.1 Cython 3.0.4 cytoolz 0.12.2 dask 2023.10.0 debugpy 1.8.0 decorator 5.1.1 defusedxml 0.7.1 dill 0.3.7 distributed 2023.10.0 entrypoints 0.4 et-xmlfile 1.1.0 exceptiongroup 1.1.3 executing 1.2.0 fastjsonschema 2.18.1 filelock 3.13.1 flatbuffers 23.5.26 fonttools 4.43.1 fqdn 1.5.1 fsspec 2023.9.2 gast 0.5.4 gitdb 4.0.10 GitPython 3.1.40 gmpy2 2.1.2 google-auth 2.23.3 google-auth-oauthlib 1.0.0 google-pasta 0.2.0 graphviz 0.21 greenlet 3.0.0 grpcio 1.59.0 h5py 3.10.0 idna 3.4 imagecodecs 2023.9.18 imageio 2.31.5 importlib-metadata 6.8.0 importlib-resources 6.1.0 ipykernel 6.25.2 ipympl 0.9.3 ipython 8.16.1 ipython-genutils 0.2.0 ipywidgets 8.1.1 isoduration 20.11.0 jedi 0.19.1 Jinja2 3.1.2 joblib 1.3.2 json5 0.9.14 jsonpatch 1.33 jsonpointer 2.4 jsonschema 4.19.1 jsonschema-specifications 2023.7.1 jupyter_client 8.4.0 jupyter_core 5.4.0 jupyter-events 0.8.0 jupyter-lsp 2.2.0 jupyter_server 2.8.0 jupyter-server-mathjax 0.2.6 jupyter_server_terminals 0.4.4 jupyter-telemetry 0.1.0 jupyterhub 4.0.2 jupyterlab 4.0.7 jupyterlab-language-pack-zh-CN 4.4.post0 jupyterlab-pygments 0.2.2 jupyterlab_server 2.25.0 jupyterlab-widgets 3.0.9 keras 2.14.0 kiwisolver 1.4.5 lazy_loader 0.3 libclang 16.0.6 libmambapy 1.5.2 lightgbm 4.6.0 llvmlite 0.40.1 locket 1.0.0 lz4 4.3.2 Mako 1.2.4 mamba 1.5.2 Markdown 3.5 MarkupSafe 2.1.3 matplotlib 3.8.0 matplotlib-inline 0.1.6 mistune 3.0.1 ml-dtypes 0.2.0 mpmath 1.3.0 msgpack 1.0.6 munkres 1.1.4 narwhals 2.5.0 nbclient 0.8.0 nbconvert 7.9.2 nbdime 3.2.1 nbformat 5.9.2 nest-asyncio 1.5.8 networkx 3.2 notebook 7.0.6 notebook_shim 0.2.3 numba 0.57.1 numexpr 2.8.7 numpy 1.24.4 nvidia-nccl-cu12 2.28.3 oauthlib 3.2.2 openpyxl 3.1.2 opt-einsum 3.3.0 overrides 7.4.0 packaging 23.2 pamela 1.1.0 pandas 2.1.1 pandocfilters 1.5.0 parso 0.8.3 partd 1.4.1 patsy 0.5.3 pexpect 4.8.0 pickleshare 0.7.5 Pillow 10.1.0 pip 23.3 pkgutil_resolve_name 1.3.10 platformdirs 3.11.0 plotly 6.3.0 pluggy 1.3.0 prometheus-client 0.17.1 prompt-toolkit 3.0.39 protobuf 4.24.3 psutil 5.9.5 ptyprocess 0.7.0 pure-eval 0.2.2 py-cpuinfo 9.0.0 pyarrow 13.0.0 pyasn1 0.5.0 pyasn1-modules 0.3.0 pycosat 0.6.6 pycparser 2.21 pycurl 7.45.1 Pygments 2.16.1 PyJWT 2.8.0 pyOpenSSL 23.2.0 pyparsing 3.1.1 PySocks 1.7.1 python-dateutil 2.8.2 python-json-logger 2.0.7 pytz 2023.3.post1 PyWavelets 1.4.1 PyYAML 6.0.1 pyzmq 25.1.1 referencing 0.30.2 requests 2.31.0 requests-oauthlib 1.3.1 rfc3339-validator 0.1.4 rfc3986-validator 0.1.1 rpds-py 0.10.6 rsa 4.9 ruamel.yaml 0.17.39 ruamel.yaml.clib 0.2.7 scikit-image 0.22.0 scikit-learn 1.3.1 scipy 1.11.3 seaborn 0.13.0 Send2Trash 1.8.2 setuptools 68.2.2 six 1.16.0 smmap 3.0.5 sniffio 1.3.0 sortedcontainers 2.4.0 soupsieve 2.5 SQLAlchemy 2.0.22 stack-data 0.6.2 statsmodels 0.14.0 sympy 1.13.3 tables 3.9.1 tblib 2.0.0 tensorboard 2.14.1 tensorboard-data-server 0.7.1 tensorflow 2.14.0 tensorflow-estimator 2.14.0 tensorflow-io-gcs-filesystem 0.34.0 termcolor 2.3.0 terminado 0.17.1 threadpoolctl 3.2.0 tifffile 2023.9.26 tinycss2 1.2.1 tomli 2.0.1 toolz 0.12.0 torch 2.7.1+cpu tornado 6.3.3 tqdm 4.66.1 traitlets 5.11.2 truststore 0.8.0 types-python-dateutil 2.8.19.14 typing_extensions 4.12.2 typing-utils 0.1.0 tzdata 2023.3 uri-template 1.3.0 urllib3 2.0.7 wcwidth 0.2.8 webcolors 1.13 webencodings 0.5.1 websocket-client 1.6.4 Werkzeug 3.0.0 wheel 0.41.2 widgetsnbextension 4.0.9 wrapt 1.14.1 xgboost 3.0.5 xlrd 2.0.1 xyzservices 2023.10.0 zict 3.0.0 zipp 3.17.0 zstandard 0.21.0 注意!一定要保证运行完你给的代码比我原版的代码跑出来的分数要高!谢谢你!
10-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值