1. 自定义损失函数
提供的三个输入:
approxes 训练输出
targets 标签
weights 权重
要求的两个输出:
der1 一阶导
der2 二阶导
class LoglossObjective(object):
def calc_ders_range(self, approxes, targets, weights):
# approxes, targets, weights are indexed containers of floats
# (containers with only __len__ and __getitem__ defined).
# weights parameter can be None.
# Returns list of pairs (der1, der2)
assert len(approxes) == len(targets)
if weights is not None:
assert len(weights) == len(approxes)
exponents = []
for index in xrange(len(approxes)):
exponents.append(math.exp(approxes[index]))
result = []
for index in xrange(len(targets)):
p = exponents[index] / (1 + exponents[index])
der1 = (1 - p) if targets[index] > 0.0 else -p
der2 = -p * (1 - p)
if weights is not None:
der1 *= weights[index]
der2 *= weights[index]
result.append((der1, der2))
return result
2.自定义eval_metric
下面的get_final_error、is_max_optimal、evaluate这个函数必须有。
提供的三个输入:
approxes 训练输出
targets 标签
weights 权重
输出error_sum:如F1、准确率等
class LoglossMetric(object):
def get_final_error(self, error, weight):
return error / (weight + 1e-38)#可以自己定义error_sum与权重的关系
def is_max_optimal(self):
return True
def evaluate(self, approxes, target, weight):
# approxes is list of indexed containers
# (containers with only __len__ and __getitem__ defined), one container
# per approx dimension. Each container contains floats.
# weight is one dimensional indexed container.
# target is float.
# weight parameter can be None.
# Returns pair (error, weights sum)
assert len(approxes) == 1
assert len(target) == len(approxes[0])
approx = approxes[0]
error_sum = 0.0
weight_sum = 0.0
for i in xrange(len(approx)):
w = 1.0 if weight is None else weight[i]
weight_sum += w
error_sum += w * (target[i] * approx[i] - math.log(1 + math.exp(approx[i])))
return error_sum, weight_sum
3.集成在catboost
model = CatBoostClassifier(loss_function=LoglossObjective(),
eval_metric=LoglossMetric())