我要参加中国移动梧桐杯数据赛道的比赛,我将会提供我目前在它的系统中得分最高的代码,请你在这个代码的基础上,帮我进行调整来帮助我在这个比赛中获取更高的分数,请你直接在我下面的代码上进行修改改好之后发给我,注意!要确保你给我的代码可以顺利运行,并告知我大概运行完的耗时是多久。保证用了你的代码提交之后,我在它的比赛系统中所得的分数比我最好的分数还要高!
我的代码如下:
import pandas as pd
from pandas import Series
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix,classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import lightgbm as lgb
# import catboost as cb
import xgboost as xgb
from sklearn.linear_model import BayesianRidge
from scipy.stats import randint, uniform
import warnings
pd.set_option('display.max_columns', None) # None表示显示所有列
'''
在「Configuration」标签页的「Environment variables」下方,找到「VM options」(若没有则手动添加),
输入-Xmx2g(表示限制最大使用内存为 2GB,可根据电脑内存调整,如 4GB 内存的电脑建议设为 1-2GB)。
'''
warnings.filterwarnings('ignore')
df_train= pd.read_csv( '/home/jovyan/output/dataA/train.csv')
append_list=[16936, 24227, 10469, 3867, 32621, 25670, 704, 8715, 36938, 9599, 26901, 5466, 30038, 27722, 20097, 44076, 44510, 25927, 43500, 25292, 20980, 36553, 31715, 48212, 13993, 45508, 45573, 13915, 17362, 17902, 3870, 18499, 38359, 21417, 14355, 28542, 10915, 11047, 6392, 29410, 23132, 32978, 5528, 4123, 43106, 28447, 46005, 6201, 8425, 26277, 43955, 47491, 6683, 32659, 15943, 8895, 19256, 38659, 195, 34322, 37899, 40851, 12141, 46259, 23288, 42114, 35746, 34509, 18905, 9410, 38030, 28636, 37029, 47061, 31024, 29217, 18273, 31329, 26872, 26247, 26156, 40498, 40571, 20331, 34265, 34441, 33867, 28519, 43126, 28026, 13541, 10904, 11650, 31719, 44779, 20918, 34806, 2329, 11595, 30801]
df_train=pd.concat([df_train,df_train.iloc[append_list]])
#删除异常值
df_dbscan=df_train.drop(['user_id','registration_date','residence_base_station_id'
,'residence_cell_id','tariff_id','registration_channel_id'],axis=1)
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
# 对数据进行标准化
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_dbscan)
# 使用 DBSCAN 进行离群值检测,调整参数
dbscan = DBSCAN(eps=4.1, min_samples=3)#final_accuracy: 0.9604713689148218 final_f1: 0.9604110347555872 final_score: 0.9604532686670514
# dbscan = DBSCAN(eps=4, min_samples=3)#final_accuracy: 0.9602194787379973 final_f1: 0.9601591212171151 final_score: 0.9602013714817326
# dbscan = DBSCAN(eps=4.1, min_samples=4)#final_accuracy: 0.9588813908269472 final_f1: 0.9588206772705757 final_score: 0.9588631767600357
df_dbscan['labels'] = dbscan.fit_predict(df_scaled)
# 计算离群值的数量(labels 为 -1 的是离群值)
outliers_count = (df_dbscan['labels'] == -1).sum()
print('离群值样本个数:', outliers_count)
# 删除离群值样本
df_train_improtant = df_train[df_dbscan['labels'] == -1]
df_train=pd.concat([df_train,df_train_improtant]).reset_index(drop=True)
# exit()
df_train['data']='train'
df_testA = pd.read_csv('/home/jovyan/output/dataA/testA.csv')
# df_testA.to_csv('testA1.csv')
# exit()
df_testA['data']='test'
# df_train_1=df_train.drop(["is_positive"],axis=0,inplace=True)
df = pd.concat([df_train,df_testA],join="inner")
# df.drop(["Unnamed: 0"],axis=1,inplace=True)#删除索引列
#1.数据缺失 发现没有空缺值
# print(df.info())
#2.查看用户id是否有重复 数据重复 结果显示没有重复用户id
has_duplicates = df['user_id'].nunique() == len(df)
print(has_duplicates)
# 分别取出数值类别列名称 和类别列名称 和需要特殊处理的列
# for column in df.columns:
# if df[column].dtype=='object':
# category_cols.append(column)
# elif df[column].dtype=='float64' or 'int64':
# numeric_cols.append(column)
numeric_cols = ['age', 'over_limit_data(MB)', 'call_duration(minutes)', 'monthly_call_count',
'monthly_weekend_call_count', 'avg_call_duration(minutes)', 'avg_weekday_call_duration(minutes)',
'avg_weekend_call_duration(minutes)', 'residence_duration_9to11', 'residence_duration_11to14',
'residence_duration_14to17', 'residence_duration_17to21', 'residence_duration_21to23',
'residence_duration_24to6', 'total_residence_duration']
category_cols = ['user_id','registration_date', 'gender', 'uses_education_app',
'uses_entertainment_app', 'uses_shopping_app']
special_cols=['registration_channel_id''3-2-3', 'residence_base_station_id 2-剩下的', 'residence_cell_id 3-3-剩下的'
,'tariff_id''4-4-4', 'tariff_price(RMB)数值需要分几个小组', 'total_data(MB)数值需要分几个小组',
'total_voice(minutes)数值需要分几个小组']
print(len(numeric_cols)+len(category_cols)+len(special_cols))
#3.异常值处理
#5.特征工程处理
def preprocess(df):
###label字段处理 tariff_price(RMB) total_data(MB) df['year']
#1.分隔日期字段处理
df['registration_date']=df['registration_date'].str.replace('/','-')
df['year'] = pd.to_datetime(df['registration_date']).dt.year # 日期
df['month'] = pd.to_datetime(df['registration_date']).dt.month # 时间
df['day'] = pd.to_datetime(df['registration_date']).dt.day # 周
#2. 第一到四季节 春夏秋冬 上学期间 和放假期间
df['season1']=df['month'].apply(lambda x: 1 if 1<=int(x)<=3 else 0)
df['season2']=df['month'].apply(lambda x: 1 if 3 df['avg_call_duration(minutes)'].quantile(0.9)).astype(
int) # 长通话用户标记
# 流量使用特征:超量比例、流量/通话行为关联
df['over_limit_ratio'] = df['over_limit_data(MB)'] / (df['total_data(MB)'] + 1e-6) # 超量流量占比
df['over_limit_samll']=df['over_limit_data(MB)'].apply(lambda x: 1 if 0100 else 0)
df['data_per_call'] = df['total_data(MB)'] / (df['monthly_call_count'] + 1e-6) # 每次通话伴随流量使用
# APP使用组合特征:多APP使用行为(教育+购物=高价值用户?)
df['multi_app_flag'] = (
(df['uses_education_app'] + df['uses_entertainment_app'] + df['uses_shopping_app']) >= 2).astype(int)
df['edu_shopping_flag'] = (df['uses_education_app'] & df['uses_shopping_app']).astype(int) # 教育+购物用户
# ---------------------- 3. 统计特征(增强泛化) ----------------------
# 基站停留稳定性:各时段停留时长标准差(越小越稳定)
residence_cols = [col for col in df.columns if 'residence_duration' in col]
df['residence_std'] = df[residence_cols].std(axis=1)
df['residence_max_ratio'] = df[residence_cols].max(axis=1) / df['total_residence_duration'] # 最长停留时段占比
# 套餐性价比特征:单价流量、单价通话
df['data_per_price'] = df['total_data(MB)'] / (df['tariff_price(RMB)'] + 1e-6)
df['voice_per_price'] = df['total_voice(minutes)'] / (df['tariff_price(RMB)'] + 1e-6)
# ---------------------- 4. 特殊字段处理(优化编码逻辑) ----------------------
# 基站/渠道ID:提取层级特征(避免原代码字符切割的冗余)
df['registration_channel_level1'] = df['registration_channel_id'].astype(str).str[:2] # 一级渠道
df['registration_channel_level2'] = df['registration_channel_id'].astype(str).str[2:4] # 二级渠道
df['residence_cell_level1'] = df['residence_cell_id'].astype(str).str[:3] # 一级基站
df['tariff_type'] = df['tariff_id'].astype(str).str[:4] # 套餐类型前缀
# 数值分箱:使用分位数分箱(更适应数据分布,避免原代码固定区间的偏差)
category_cols = [ 'tariff_price(RMB)', 'total_data(MB)', 'year', 'registration_channel_id_f3',
'registration_channel_id_m2'
, 'residence_base_station_id_f2', 'residence_base_station_id_remain',
'residence_cell_id_f3', 'residence_cell_id_m3', 'residence_cell_id_remain', 'tariff_id_f4',
'tariff_id_m4', 'tariff_id_remain',
'multi_app_flag', 'edu_shopping_flag', 'long_call_flag', 'tariff_type','residence_cell_level1']
#5.对应标签进行编码处理
# 使用get_dummies进行One-Hot编码
df = pd.get_dummies(df, columns=category_cols)
#6.删除不需要的列
df.drop(['registration_date','registration_channel_id','residence_base_station_id','residence_cell_id','tariff_id'],axis=1,inplace=True)
#is_positive
return df
#生成训练集合测试集
df=preprocess(df)
#4.正负样本数据均衡
from sklearn.neighbors import NearestNeighbors
def smote_synthetic_samples(minority_samples, n_samples, k_neighbors=5, random_state=42):
"""
实现SMOTE算法的核心部分,生成合成样本
参数:
◦ minority_samples: 少数类样本
◦ n_samples: 需要生成的合成样本数量
◦ k_neighbors: 近邻数量
◦ random_state: 随机种子
"""
np.random.seed(random_state)
synthetic_samples = []
# 拟合KNN模型
knn = NearestNeighbors(n_neighbors=k_neighbors)
knn.fit(minority_samples)
for _ in range(n_samples):
# 随机选择一个少数类样本
idx = np.random.randint(0, len(minority_samples))
sample = minority_samples[idx]
# 找到K个最近邻
neighbors = knn.kneighbors(sample.reshape(1, -1), return_distance=False)[0]
# 随机选择一个邻居
neighbor_idx = np.random.choice(neighbors)
neighbor = minority_samples[neighbor_idx]
# 生成合成样本
alpha = np.random.random()
synthetic = sample + alpha * (neighbor - sample)
synthetic_samples.append(synthetic)
return np.array(synthetic_samples)
def balance_by_smote(df, target_col='标签', k_neighbors=5, random_state=42):
"""使用自定义SMOTE算法实现样本均衡"""
# 分离多数类和少数类
class_counts = df[target_col].value_counts()
majority_class_idx = class_counts.idxmax()
minority_class_idx = class_counts.idxmin()
majority_class = df[df[target_col] == majority_class_idx]
minority_class = df[df[target_col] == minority_class_idx]
# 计算需要生成的样本数量
n_samples_needed = len(majority_class) - len(minority_class)
# 提取少数类特征
minority_features = minority_class.drop(target_col, axis=1).values
# 生成合成样本
synthetic_features = smote_synthetic_samples(
minority_features,
n_samples_needed,
k_neighbors,
random_state
)
# 创建合成样本的DataFrame
synthetic_df = pd.DataFrame(
synthetic_features,
columns=minority_class.drop(target_col, axis=1).columns
)
synthetic_df[target_col] = minority_class_idx
# 合并所有样本
balanced_df = pd.concat([majority_class, minority_class, synthetic_df], ignore_index=True)
print(f"SMOTE 后的数据类别分布:")
print(balanced_df[target_col].value_counts(normalize=True))
return balanced_df
# 特征标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(df[df['data']=='train'].drop(['data','user_id'],axis=1))
X_testA= scaler.transform(df[df['data']=='test'].drop(['data','user_id'],axis=1))
X_testA=pd.DataFrame(X_testA)
#样本均衡
data_smoth_before=pd.DataFrame(X_train)
data_smoth_before['is_positive']=df_train['is_positive']
#新的smoth
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import shuffle
data_somth_after=balance_by_smote(data_smoth_before,target_col='is_positive')
#打散生成的数据
from sklearn.utils import shuffle
data_somth_after = shuffle(data_somth_after)
y=data_somth_after['is_positive']
X=data_somth_after.drop(['is_positive'],axis=1)
# 特征筛选代码(保留前 500 个重要特征)
# 训练基础LGB模型筛选特征
temp_lgb = lgb.LGBMClassifier(random_state=42, n_estimators=100)
temp_lgb.fit(X, y)
# 计算特征重要性并筛选前500个
feat_importance = pd.Series(temp_lgb.feature_importances_, index=X.columns)
top_feat = feat_importance.sort_values(ascending=False).head(600).index
X = X[top_feat] # 训练集仅保留top550特征
X_testA = X_testA[top_feat] # 测试集同步筛选
# y=y_train_tomek
# X=X_train_tomek
X.columns =X.columns.astype(str)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2 )
#建模预测
import catboost as cb
Logistic_Regression=LogisticRegression(max_iter=1000, random_state=42) # 类别权重,)
Random_Forest= RandomForestClassifier(random_state=42) # XGB用scale_pos_weight表示权重比
Gradient_Boosting_model=GradientBoostingClassifier(random_state=42)
XGBoost_model=xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')
LightGBM_model=lgb.LGBMClassifier(random_state=42)# 类别权重
CatBoost_model=cb.CatBoostClassifier(random_state=42, verbose=0)
# CatBoost_model=cb.CatBoostClassifier(random_state=42, verbose=0)
# model_list=[Logistic_Regression,KNearest_Neighbors,Random_Forest,Gradient_Boosting_model,XGBoost_model,LightGBM_model,CatBoost_model]
# model_list=[Logistic_Regression,KNearest_Neighbors,Random_Forest,Gradient_Boosting_model,XGBoost_model,LightGBM_model]
# ensemble_train_results =[]
# ensemble_test_results =[]
#
# for model in model_list:
# #模型训练
# model_train=model.fit(X_train, y_train)
# # 预测
# y_pred = model_train.predict(X_test)
# y_pred_proba = model_train.predict_proba(X_test)[:, 1] if len(np.unique(y)) == 2 else None
# #将测试集的预测结果保存起来,一会集成模型要用
# # ensemble_test_results.append(Series(y_pred_proba))
# #将训练集的预测结果保存起来,一会集成模型要用
# # y_train_proba=model_train.predict_proba(X_train)[:, 1] if len(np.unique(y)) == 2 else None
# # ensemble_train_results.append(Series(y_train_proba))
# # 评估
# accuracy = accuracy_score(y_test, y_pred)
# precision = precision_score(y_test, y_pred, average='weighted')
# recall = recall_score(y_test, y_pred, average='weighted')
# f1 = f1_score(y_test, y_pred, average='weighted')
# print('accuracy:',accuracy,'f1:',f1,'score:',0.3*f1+0.7*accuracy)
#6.参数调优
# 使用随机搜索优化超参数
# random_search = RandomizedSearchCV(
# best_model,
# param_distributions=param_dist,
# n_iter=20, # 搜索20组参数组合
# cv=5, # 5折交叉验证
# scoring='f1_weighted',
# n_jobs=-1, # 使用所有可用的CPU
# random_state=42,
# verbose=1
# )
#模型融合
# 根据以上AUC的结果,选择: LR 和 SVC 和 XGB 当做基模型
#%%
# 把以上3个模型的预测结果集成起来
# ensemble_test_concat = pd.concat(ensemble_test_results, axis=1)
# ensemble_train_concat = pd.concat(ensemble_train_results, axis=1)
#%%
#%%
from sklearn.ensemble import VotingClassifier
voting_clf = VotingClassifier(estimators=[('RF',Random_Forest),('GB',Gradient_Boosting_model),
('XGB',XGBoost_model),('LGBM',LightGBM_model),
('cb', CatBoost_model)],voting = 'soft')
# # 采用贝叶斯回归作为结果融合的模型(final model)
# # clf = BayesianRidge()
# clf =LogisticRegression(max_iter=1000, random_state=42)
# # 在训练数据上进行训练
# clf.fit(ensemble_train_concat, y_train)
# #%%
#训练softvote模型
voting_clf.fit(X_train, y_train)
# 预测test样本
y_final_pred = voting_clf.predict(X_test)
# 用训练集的OOF预测找最优阈值
# def find_best_threshold(y_true, y_proba, step=0.005):
# """遍历阈值,找到最大化0.3*F1 + 0.7*准确率的阈值"""
# best_score = 0.0
# best_threshold = 0.5
# thresholds = np.arange(0.005, 1.0, step)
#
# for threshold in thresholds:
# y_pred = (y_proba >= threshold).astype(int)
# acc = accuracy_score(y_true, y_pred)
# f1 = f1_score(y_true, y_pred)
# score = 0.3 * f1 + 0.7 * acc
#
# if score > best_score:
# best_score = score
# best_threshold = threshold
#
# print(f"最优阈值:{best_threshold:.2f},对应得分:{best_score:.4f}")
# return best_threshold, best_score
#
accuracy = accuracy_score(y_test, y_final_pred )
precision = precision_score(y_test,y_final_pred , average='weighted')
recall = recall_score(y_test,y_final_pred , average='weighted')
f1 = f1_score(y_test,y_final_pred , average='weighted')
print('final_accuracy:', accuracy, 'final_f1:', f1, 'final_score:', 0.3 * f1 + 0.7 * accuracy)
# 预测test样本
y_testA_pred = voting_clf.predict(X_testA)
# y_testA_final_pred= Series((y_testA_pred >= threshold).astype(int))
print(y_testA_pred)
X_testA['is_positive']=Series(y_testA_pred )
X_testA['user_id']=df[df['data']=='test']['user_id'].values
X_testA[['user_id','is_positive']].to_csv('submitA.csv',index=False,encoding='utf-8')
注意只能使用他们比赛环境中自带的库,不允许安装其他第三方库:
它的库只有以下:
Package Version
------------------------------ ------------
absl-py 2.0.0
alembic 1.12.0
altair 5.1.2
anyio 4.0.0
argon2-cffi 23.1.0
argon2-cffi-bindings 21.2.0
arrow 1.3.0
asttokens 2.4.0
astunparse 1.6.3
async-generator 1.10
async-lru 2.0.4
attrs 23.1.0
Babel 2.13.0
backcall 0.2.0
backports.functools-lru-cache 1.6.5
beautifulsoup4 4.12.2
bleach 6.1.0
blinker 1.6.3
bokeh 3.3.0
boltons 23.0.0
Bottleneck 1.3.7
Brotli 1.1.0
cached-property 1.5.2
cachetools 5.3.1
catboost 1.2.8
certifi 2023.7.22
certipy 0.1.3
cffi 1.16.0
charset-normalizer 3.3.0
click 8.1.7
cloudpickle 3.0.0
colorama 0.4.6
comm 0.1.4
conda 23.9.0
conda-package-handling 2.2.0
conda_package_streaming 0.9.0
contourpy 1.1.1
cryptography 41.0.4
cycler 0.12.1
Cython 3.0.4
cytoolz 0.12.2
dask 2023.10.0
debugpy 1.8.0
decorator 5.1.1
defusedxml 0.7.1
dill 0.3.7
distributed 2023.10.0
entrypoints 0.4
et-xmlfile 1.1.0
exceptiongroup 1.1.3
executing 1.2.0
fastjsonschema 2.18.1
filelock 3.13.1
flatbuffers 23.5.26
fonttools 4.43.1
fqdn 1.5.1
fsspec 2023.9.2
gast 0.5.4
gitdb 4.0.10
GitPython 3.1.40
gmpy2 2.1.2
google-auth 2.23.3
google-auth-oauthlib 1.0.0
google-pasta 0.2.0
graphviz 0.21
greenlet 3.0.0
grpcio 1.59.0
h5py 3.10.0
idna 3.4
imagecodecs 2023.9.18
imageio 2.31.5
importlib-metadata 6.8.0
importlib-resources 6.1.0
ipykernel 6.25.2
ipympl 0.9.3
ipython 8.16.1
ipython-genutils 0.2.0
ipywidgets 8.1.1
isoduration 20.11.0
jedi 0.19.1
Jinja2 3.1.2
joblib 1.3.2
json5 0.9.14
jsonpatch 1.33
jsonpointer 2.4
jsonschema 4.19.1
jsonschema-specifications 2023.7.1
jupyter_client 8.4.0
jupyter_core 5.4.0
jupyter-events 0.8.0
jupyter-lsp 2.2.0
jupyter_server 2.8.0
jupyter-server-mathjax 0.2.6
jupyter_server_terminals 0.4.4
jupyter-telemetry 0.1.0
jupyterhub 4.0.2
jupyterlab 4.0.7
jupyterlab-language-pack-zh-CN 4.4.post0
jupyterlab-pygments 0.2.2
jupyterlab_server 2.25.0
jupyterlab-widgets 3.0.9
keras 2.14.0
kiwisolver 1.4.5
lazy_loader 0.3
libclang 16.0.6
libmambapy 1.5.2
lightgbm 4.6.0
llvmlite 0.40.1
locket 1.0.0
lz4 4.3.2
Mako 1.2.4
mamba 1.5.2
Markdown 3.5
MarkupSafe 2.1.3
matplotlib 3.8.0
matplotlib-inline 0.1.6
mistune 3.0.1
ml-dtypes 0.2.0
mpmath 1.3.0
msgpack 1.0.6
munkres 1.1.4
narwhals 2.5.0
nbclient 0.8.0
nbconvert 7.9.2
nbdime 3.2.1
nbformat 5.9.2
nest-asyncio 1.5.8
networkx 3.2
notebook 7.0.6
notebook_shim 0.2.3
numba 0.57.1
numexpr 2.8.7
numpy 1.24.4
nvidia-nccl-cu12 2.28.3
oauthlib 3.2.2
openpyxl 3.1.2
opt-einsum 3.3.0
overrides 7.4.0
packaging 23.2
pamela 1.1.0
pandas 2.1.1
pandocfilters 1.5.0
parso 0.8.3
partd 1.4.1
patsy 0.5.3
pexpect 4.8.0
pickleshare 0.7.5
Pillow 10.1.0
pip 23.3
pkgutil_resolve_name 1.3.10
platformdirs 3.11.0
plotly 6.3.0
pluggy 1.3.0
prometheus-client 0.17.1
prompt-toolkit 3.0.39
protobuf 4.24.3
psutil 5.9.5
ptyprocess 0.7.0
pure-eval 0.2.2
py-cpuinfo 9.0.0
pyarrow 13.0.0
pyasn1 0.5.0
pyasn1-modules 0.3.0
pycosat 0.6.6
pycparser 2.21
pycurl 7.45.1
Pygments 2.16.1
PyJWT 2.8.0
pyOpenSSL 23.2.0
pyparsing 3.1.1
PySocks 1.7.1
python-dateutil 2.8.2
python-json-logger 2.0.7
pytz 2023.3.post1
PyWavelets 1.4.1
PyYAML 6.0.1
pyzmq 25.1.1
referencing 0.30.2
requests 2.31.0
requests-oauthlib 1.3.1
rfc3339-validator 0.1.4
rfc3986-validator 0.1.1
rpds-py 0.10.6
rsa 4.9
ruamel.yaml 0.17.39
ruamel.yaml.clib 0.2.7
scikit-image 0.22.0
scikit-learn 1.3.1
scipy 1.11.3
seaborn 0.13.0
Send2Trash 1.8.2
setuptools 68.2.2
six 1.16.0
smmap 3.0.5
sniffio 1.3.0
sortedcontainers 2.4.0
soupsieve 2.5
SQLAlchemy 2.0.22
stack-data 0.6.2
statsmodels 0.14.0
sympy 1.13.3
tables 3.9.1
tblib 2.0.0
tensorboard 2.14.1
tensorboard-data-server 0.7.1
tensorflow 2.14.0
tensorflow-estimator 2.14.0
tensorflow-io-gcs-filesystem 0.34.0
termcolor 2.3.0
terminado 0.17.1
threadpoolctl 3.2.0
tifffile 2023.9.26
tinycss2 1.2.1
tomli 2.0.1
toolz 0.12.0
torch 2.7.1+cpu
tornado 6.3.3
tqdm 4.66.1
traitlets 5.11.2
truststore 0.8.0
types-python-dateutil 2.8.19.14
typing_extensions 4.12.2
typing-utils 0.1.0
tzdata 2023.3
uri-template 1.3.0
urllib3 2.0.7
wcwidth 0.2.8
webcolors 1.13
webencodings 0.5.1
websocket-client 1.6.4
Werkzeug 3.0.0
wheel 0.41.2
widgetsnbextension 4.0.9
wrapt 1.14.1
xgboost 3.0.5
xlrd 2.0.1
xyzservices 2023.10.0
zict 3.0.0
zipp 3.17.0
zstandard 0.21.0
注意!一定要保证运行完你给的代码比我原版的代码跑出来的分数要高!谢谢你!