机器学习的作业 传上来 供期末复习用
import numpy as np
from sklearn import datasets
breast_cancer_data = datasets.load_breast_cancer()
features = breast_cancer_data.data
targets = breast_cancer_data.target
data_total = np.array(features, dtype=np.int64)
label_total = np.array(targets, dtype=np.int64)
for i in range(len(label_total)):
if label_total[i] == 0:
label_total[i] = -1
data_train = data_total[0:500]
label_train = label_total[0:500]
data_test = data_total[500:]
label_test = label_total[500:]
w = np.zeros([1, 30], dtype=np.float32)
b = 0
eta = 1
iteration = 0
error_data = True
while error_data:
error_data = False
for i in range(len(data_train)):
judge = label_train[i] * (np.dot(data_train[i], w.T) + b)
if judge <= 0:
error_data = True
w = w + np.dot(label_train[i], data_train[i])
b = b+label_train[i]
iteration = iteration + 1
if iteration > 500000:
break
print('Iteration:%d w:%s b:%s' % (iteration, w, b))
num = len(label_test)
error = 0.0
for i in range(num):
classify_num = np.dot(data_test[i], w.T) + b
if classify_num > 0:
class_ = 1
else:
class_ = -1
if class_ != label_test[i]:
error += 1
print('错误率为 %f' % (error / num))