1.lgm优化
1.1 低Remaining范围样本高权重
weight_ls = np.array(feats['mRNA_remaining_pct'].apply(lambda x:2 if ((x<=30)and(x>=0)) else 1))
1.2 使用官方评价指标作为损失函数
def calculate_metrics(preds, data, threshold=30):
y_pred = preds
y_true = data.get_label()
mae = np.mean(np.abs(y_true - y_pred))
y_true_binary = ((y_true <= threshold) & (y_true >= 0)).astype(int)
y_pred_binary = ((y_pred <= threshold) & (y_pred >= 0)).astype(int)
mask = (y_pred >= 0) & (y_pred <= threshold)
range_mae = (
mean_absolute_error(y_true[mask], y_pred[mask]) if np.sum(mask) > 0 else 100
)
if np.sum(y_pred_binary) > 0:
precision = (np.array(y_pred_binary) & y_true_binary).sum()/np.sum(y_pred_binary)
else:
precision = 0
if np.sum(y_true_binary) > 0:
recall = (np.array(y_pred_binary) & y_true_binary).sum