机器学习-KNN
KNN入门-了解KNN
import numpy as np
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt
np.random.seed(28)
x_apple = np.random.normal(loc=3, scale=2, size=20).reshape(-1, 1)
y_apple = np.random.normal(loc=8, scale=2, size=20).reshape(-1, 1)
x_pear = np.random.normal(loc=20, scale=3, size=20).reshape(-1, 1)
y_pear = np.random.normal(loc=15, scale=3, size=20).reshape(-1, 1)
plt.figure()
plt.scatter(x_apple, y_apple, label='苹果')
plt.scatter(x_pear, y_pear, label='梨')
plt.legend(loc='upper left')
plt.xlabel('形状')
plt.ylabel('颜色')
plt.show()
X_apple = np.concatenate([x_apple, y_apple, [[0]] * 20], axis=1)
X_pear = np.concatenate([x_pear, y_pear, [[1]] * 20], axis=1)
data = np.concatenate([X_apple, X_pear])
target = np.asarray([15, 12])
plt.scatter(15, 12)
plt.show()
def getDistance():
return_list = np.zeros(shape=(40, 2))
i = 0
for item in data:
distance = np.sqrt((item[0] - target[0])**2 + (item[1] - target[1])**2)
return_list[i] = [distance, item[2]]
i += 1
return return_list
return_list = getDistance()
sorted_index = np.argsort(return_list[:, 0])
sorted_list = return_list[sorted_index]
k = 5
apple = 0
pear = 0
for i in range(k):
if sorted_list[i][1] == 0:
apple += 1
elif sorted_list[i][1] == 1:
pear += 1
print('apple:{}---pear:{}'.format(apple, pear))
if apple > pear:
print('这是apple')
elif apple < pear:
print('这是pear')
else:
print('error')
通过已知的点和KNN算法来画KD树
import numpy as np
import pandas as pd
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt
data = np.array([[7, 2], [5, 4], [9, 6], [2, 3], [4, 7], [8, 1], [2, 6], [8, 9], [1, 5]])
data = pd.DataFrame(data, columns=['x', 'y'])
plt.figure()
plt.scatter(data['x'].values, data['y'].values)
def sort_data(data, is_x):
if is_x:
data = data.sort_values(by='x')
data.reset_index(drop=True, inplace=True)
else:
data = data.sort_values(by='y')
data.reset_index(drop=True, inplace=True)
return data
def draw_line(line_num, space_start, space_end, is_x):
if is_x:
plt.plot([line_num, line_num], [space_start, space_end])
else:
plt.plot([space_start, space_end], [line_num, line_num])
def draw_kd_tree(data, is_x, space_start_x, space_start_y, space_end_x, space_end_y):
if data['x'].count() <= 0:
return
if is_x:
data = sort_data(data, True)
mid_index = int(data['x'].count()/2)
line_num = data.loc[mid_index, 'x']
draw_line(line_num, space_start_y, space_end_y, True)
left_data = data.loc[0:mid_index-1, :]
right_data = data.loc[mid_index + 1:, :]
draw_kd_tree(left_data, bool(1-is_x), space_start_x, space_start_y, line_num, space_end_y)
draw_kd_tree(right_data, bool(1-is_x), line_num, space_start_y, space_end_x, space_end_y)
pass
else:
data = sort_data(data, False)
mid_index = int(data['y'].count()/2)
line_num = data.loc[mid_index, 'y']
draw_line(line_num, space_start_x, space_end_x, False)
left_data = data.loc[0:mid_index - 1, :]
right_data = data.loc[mid_index + 1:, :]
draw_kd_tree(left_data, bool(1-is_x), space_start_x, space_start_y, space_end_x, line_num)
draw_kd_tree(right_data, bool(1-is_x), space_start_x, line_num, space_end_x, space_end_y)
pass
is_x = False
if data['x'].var() >= data['y'].var():
is_x = True
else:
is_x = False
draw_kd_tree(data, is_x, 0, 0, 10, 10)
plt.show()

导入KNN的包使用KNN的API来画树
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
plt.figure()
data = make_blobs(n_samples=200, centers=2, random_state=28)
x, y = data
clf = KNeighborsClassifier()
x_grid = np.arange(-10, 10, 0.02)
y_grid = np.arange(-6, 6, 0.02)
x_grid, y_grid = np.meshgrid(x_grid, y_grid)
z = np.c_[x_grid.ravel(), y_grid.ravel()]
clf.fit(x, y)
z = clf.predict(z)
z = z.reshape(x_grid.shape)
plt.pcolormesh(x_grid, y_grid, z, cmap=plt.cm.Pastel1)
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.spring, edgecolors='k')
plt.scatter(-2.5, -4, marker='*', s=200, c='r')
z = clf.predict([[-2.5, -4]])
print(z)
plt.show()

多分类KNN
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
data = make_blobs(n_samples=500, centers=5, random_state=8)
x, y = data
clf = KNeighborsClassifier()
clf.fit(x, y)
print(clf.score(x, y))
x_grid = np.arange(-10, 12, 0.02)
y_grid = np.arange(-15, 15, 0.02)
x_grid, y_grid = np.meshgrid(x_grid, y_grid)
z = np.c_[x_grid.ravel(), y_grid.ravel()]
result = clf.predict(z)
result = result.reshape(x_grid.shape)
plt.pcolormesh(x_grid, y_grid, result, cmap=plt.cm.Pastel1)
plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.spring, edgecolors='k')
plt.show()

使用KNN算法做回归
from sklearn.datasets import make_regression
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['simHei']
mpl.rcParams['axes.unicode_minus'] = False
import matplotlib.pyplot as plt
plt.figure()
x, y = make_regression(n_features=1, noise=50, random_state=2)
plt.scatter(x, y)
z = np.linspace(-3, 3, 200).reshape(-1, 1)
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
polyModel = Pipeline([
('Poly', PolynomialFeatures(include_bias=True)),
('Linear', LinearRegression(fit_intercept=False))
])
polyModel.set_params()
polyModel.fit(x, y)
plt.plot(z, polyModel.predict(z), c='g', linewidth=2)
print('多项式', polyModel.score(x, y))
from sklearn.neighbors import KNeighborsRegressor
reg = KNeighborsRegressor(n_neighbors=5)
reg.fit(x, y)
plt.plot(z, reg.predict(z), c='r', linewidth=2)
print('KNN', reg.score(x, y))
plt.show()
