Numpy实现Adaboost

全栈_xap

于 2025-02-02 19:08:20 发布

阅读量475

点赞数 21

文章标签： numpy 机器学习人工智能

本文链接：https://blog.youkuaiyun.com/2501_90424682/article/details/145421825

版权

class Adaboost():

“”"Boosting method that uses a number of weak classifiers in

ensemble to make a strong classifier. This implementation uses decision

stumps, which is a one level Decision Tree.

Parameters:

n_clf: int

The number of weak classifiers that will be used.

“”"

def init(self, n_clf=5):

self.n_clf = n_clf

def fit(self, X, y):

n_samples, n_features = np.shape(X)

Initialize weights to 1/N

w = np.full(n_samples, (1 / n_samples))

self.clfs = []

Iterate through classifiers

for _ in range(self.n_clf):

clf = DecisionStump()

Minimum error given for using a certain feature value threshold

for predicting sample label

min_error = float(‘inf’)

Iterate throught every unique feature value and see what value

makes the best threshold for predicting y

for feature_i in range(n_features):

feature_values = np.expand_dims(X[:, feature_i], axis=1)

unique_values = np.unique(feature_values)

Try every unique feature value as threshold

for threshold in unique_values:

p = 1

Set all predictions to ‘1’ initially

prediction = np.ones(np.shape(y))

Label the samples whose values are below threshold as ‘-1’

prediction[X[:, feature_i] < threshold] = -1

Error = sum of weights of misclassified samples

error = sum(w[y != prediction])

If the error is over 50% we flip the polarity so that samples that

were classified as 0 are classified as 1, and vice versa

E.g error = 0.8 => (1 - error) = 0.2

if error > 0.5:

error = 1 - error

p = -1

If this threshold resulted in the smallest error we save the

configuration

if error < min_error:

clf.polarity = p

clf.threshold = threshold

clf.feature_index = feature_i

min_error = error

Calculate the alpha which is used to update the sample weights,

Alpha is also an approximation of this classifier’s proficiency

clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10))

Set all predictions to ‘1’ initially

predictions = np.ones(np.shape(y))

The indexes where the sample values are below threshold

negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)

Label those as ‘-1’

predictions[negative_idx] = -1

Calculate new weights

Missclassified samples gets larger weights and correctly classified samples smaller

w *= np.exp(-clf.alpha * y * predictions)

Normalize to one

w /= np.sum(w)

Save classifier

self.clfs.append(clf)

def predict(self, X):

n_samples = np.shape(X)[0]

y_pred = np.zeros((n_samples, 1))

For each classifier => label the samples

for clf in self.clfs:

Set all predictions to ‘1’ initially

predictions = np.ones(np.shape(y_pred))

The indexes where the sample values are below threshold

negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)

Label those as ‘-1’

predictions[negative_idx] = -1

Add predictions weighted by the classifiers alpha

(alpha indicative of classifier’s proficiency)

y_pred += clf.alpha * predictions

Return sign of prediction sum

y_pred = np.sign(y_pred).flatten()

return y_pred

def main():

data = datasets.load_digits()

X = data.data

y = data.target

digit1 = 1

digit2 = 8

idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])

y = data.target[idx]

Change labels to {-1, 1}

y[y == digit1] = -1

y[y == digit2] = 1

X = data.data[idx]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

Adaboost classification with 5 weak classifiers

clf = Adaboost(n_clf=5)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print (“Accuracy:”, accuracy)