class Feature_Discretization(object):
def __init__(self):
self.min_interval = 1 # 最小间隔
self.min_epos = 0.05 # 信息增益阈值
self.final_bin = [] # 最终边界
def fit(self, x, y, min_interval=1):
self.min_interval = min_interval
# x = np.floor(x)
# x = np.int32(x)
min_val = np.min(x)
bin_dict = {}
bin_li = []
for i in range(len(x)):
pos = (x[i] - min_val) / min_interval * min_interval + min_val
target = y[i]
bin_dict.setdefault(pos, [0, 0])
if target == 1:
bin_dict[pos][0] += 1
else:
bin_dict[pos][1] += 1 # 标签one-hot的操作
for key, val in bin_dict.iteritems():
t = [float(key)]
t.extend(val)
bin_li.append(t)
bin_li.sort(cmp=None, key&