import pandas as pd
import numpy as np
# import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn import metrics
# from log_color import log,LogLevel
from sklearn import metrics
import time
import datetime
from hyperopt import hp
from sklearn.model_selection import KFold, cross_validate
from hyperopt import hp, fmin, tpe, Trials, partial
from hyperopt.early_stop import no_progress_loss
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from matplotlib import pyplot as plt
titanic_table = pd.read_csv('titanic_features_table.csv')
X = titanic_table[['年龄', '兄弟姐妹/配偶的个数', '父母/小孩个数', '票价', '1等舱', '2等舱', '3等舱', '登船港口为空',
'登船为S港', '登船为Q港', '登船为nan港', '女性', '男性',]]
y = titanic_table['是否生还']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=65)
bast_params = {
}
param_grid_hp = {
'penalty':hp.choice('penalty',['l1','l2','elasticnet','none']),
'dual':hp.choice('dual',[True,False]),
'fit_intercept':hp.choice('fit_intercept', [True,False]),
'solver-0':hp.choice('solver-0',['newton-cg', 'lbfgs', 'sag', 'saga']),
'solver-1':hp.choice('solver-1',['liblinear', 'saga']),
'solver-2':hp.choice('solver-2',['saga']),
'tol': hp.uniform('tol',0,1),
'C':hp.uniform('C',0,1),
'intercept_scaling':hp.uniform('intercept_scaling',0,1),
'max_iter':hp.uniformint('max_iter',100,1000),
'l1_ratio':hp.uniform('il1_ratio',0,1),
'random_state':hp.randint('random_state',100)
}
def hyperopt_objective(hyperopt_params):
func_start = time.time()
params = {
}
if hyperopt_params['penalty'] =='l2' or hyperopt_params['penalty'] == 'none':
solver_params = hyperopt_params['solver-0']
elif hyperopt_params['penalty'] == 'l1':
solver_params = hyperopt_params['solver-1']
elif hyperopt_params['penalty'] == 'elasticnet':
solver_params = hyperopt_params['solver-2']
if solver_params == 'liblinear' and hyperopt_params['fit_intercept'] ==True:
intercept_scaling_params = hyperopt_params['intercept_scaling']
else:
intercept_scaling_params = 1
if solver_params == 'liblinear' and hyperopt_params['penalty'] == 'l2':
dual_params = True
else:
dual_params = False
if hyperopt_params['penalty'] == 'elasticnet':
l1_ratio_params = hyperopt_params['l1_ratio']
else:
l1_ratio_params = None
try:
clf = LogisticRegression(
penalty = hyperopt_params['penalty'],
dual = dual_params,
fit_intercept = hyperopt_params['fit_intercept'],
solver = solver_params,
tol=hyperopt_params['tol'],
C = hyperopt_params['C'],
intercept_scaling = intercept_scaling_params,
max_iter = hyperopt_params['max_iter'],
l1_ratio = l1_ratio_params,
random_state=hyperopt_params['random_state']
)
clf.fit(X_train, y_train)
predict_y = clf.predict(X_test)
acc = (predict_y==y_test).sum()/y_test.size
bast_params.update(
{acc:{
'penalty' :hyperopt_params['penalty'],
'dual': dual_params,
'fit_intercept':hyperopt_params['fit_intercept'],
'solver':solver_params,
'tol':hyperopt_params['tol'],
'C':hyperopt_params['C'],
'intercept_scaling':intercept_scaling_params,
'max_iter':hyperopt_params['max_iter'],
'l1_ratio':l1_ratio_params,
'random_state':hyperopt_params['random_state']
}})
except Exception as e:
acc = 0
# print(f"acc:{acc}")
func_end = time.time()
return acc
def param_hyperopt(max_evals=100):
#保存迭代过程
trials = Trials()
#设置提前停止
## 如果损失没有增加,将在 X 次迭代后停止的停止函数
early_stop_fn = no_progress_loss(400)
#定义代理模型
#algo = partial(tpe.suggest, n_startup_jobs=20, n_EI_candidates=50)
params_best = fmin(hyperopt_objective #目标函数
, space = param_grid_hp #参数空间
, algo = tpe.suggest #代理模型
#, algo = algo
, max_evals = max_evals #允许的迭代次数
, verbose=True
, trials = trials
, early_stop_fn = early_stop_fn
)
#打印最优参数,fmin会自动打印最佳分数
print("\n","\n","best params: ", params_best,
"\n")
return params_best, trials
NOW_FUC_RUN_ITER = 0
NUM_EVALS = 800
params_best, trials = param_hyperopt(NUM_EVALS)
print(f"所有搜索相关记录:{trials.trials[0]}")
params_best
print(f"最好的参数是:{bast_params[max(bast_params)]}\n分数是:{max(bast_params)}")
贝叶斯优化逻辑回归的代码实现
最新推荐文章于 2025-04-19 09:30:00 发布