数据归一化处理
最值归一化:把所有数据映射到0-1之间
均值方差归一化:把所有数据诡异到均值为0方差为1的分布中
适用于:数据分布没有明显的边界
import numpy as np
import matplotlib.pyplot as plt
最值归一化 Normalization
x = np.random.randint(0, 100, 100)
x
array([75, 41, 95, 58, 85, 71, 43, 31, 99, 46, 74, 97, 77, 5, 63, 82, 24,
30, 49, 25, 25, 56, 7, 11, 22, 59, 78, 3, 51, 92, 26, 80, 92, 40,
41, 5, 52, 97, 27, 39, 25, 0, 2, 62, 49, 99, 25, 57, 44, 70, 88,
50, 72, 19, 88, 99, 80, 29, 72, 90, 44, 71, 35, 71, 1, 59, 31, 35,
94, 93, 96, 84, 76, 48, 63, 13, 67, 70, 9, 86, 96, 23, 58, 76, 84,
72, 54, 81, 59, 85, 37, 77, 51, 85, 56, 64, 57, 81, 21, 97])
(x - np.min(x)) / (np.max(x) - np.min(x))
array([0.75757576, 0.41414141, 0.95959596, 0.58585859, 0.85858586,
0.71717172, 0.43434343, 0.31313131, 1. , 0.46464646,
0.74747475, 0.97979798, 0.77777778, 0.05050505, 0.63636364,
0.82828283, 0.24242424, 0.3030303 , 0.49494949, 0.25252525,
0.25252525, 0.56565657, 0.07070707, 0.11111111, 0.22222222,
0.5959596 , 0.78787879, 0.03030303, 0.51515152, 0.92929293,
0.26262626, 0.80808081, 0.92929293, 0.4040404 , 0.41414141,
0.05050505, 0.52525253, 0.97979798, 0.27272727, 0.39393939,
0.25252525, 0. , 0.02020202, 0.62626263, 0.49494949,
1. , 0.25252525, 0.57575758, 0.44444444, 0.70707071,
0.88888889, 0.50505051, 0.72727273, 0.19191919, 0.88888889,
1. , 0.80808081, 0.29292929, 0.72727273, 0.90909091,
0.44444444, 0.71717172, 0.35353535, 0.71717172, 0.01010101,
0.5959596 , 0.31313131, 0.35353535, 0.94949495, 0.93939394,
0.96969697, 0.84848485, 0.76767677, 0.48484848, 0.63636364,
0.13131313, 0.67676768, 0.70707071, 0.09090909, 0.86868687,
0.96969697, 0.23232323, 0.58585859, 0.76767677, 0.84848485,
0.72727273, 0.54545455, 0.81818182, 0.5959596 , 0.85858586,
0.37373737, 0.77777778, 0.51515152, 0.85858586, 0.56565657,
0.64646465, 0.57575758, 0.81818182, 0.21212121, 0.97979798])
X = np.random.randint(0, 100, (50, 2))
X = np.array(X, dtype=float)
X[:10,:]
array([[29., 45.],
[41., 63.],
[73., 92.],
[96., 9.],
[97., 45.],
[43., 35.],
[75., 85.],
[80., 27.],
[20., 71.],
[ 6., 9.]])
X[:,0] = (X[:,0] - np.min(X[:,0])) / (np.max(X[:,0]) - np.min(X[:,0]))
X[:,1] = (X[:,1] - np.min(X[:,1])) / (np.max(X[:,1]) - np.min(X[:,1]))
X[:10,:]
array([[0.29166667, 0.48913043],
[0.41666667, 0.68478261],
[0.75 , 1. ],
[0.98958333, 0.09782609],
[1. , 0.48913043],
[0.4375 , 0.38043478],
[0.77083333, 0.92391304],
[0.82291667, 0.29347826],
[0.19791667, 0.77173913],
[0.05208333, 0.09782609]])
plt.scatter(X[:,0], X[:,1])
plt.show()
np.mean(X[:,0])
0.5639583333333333
np.std(X[:,0])
0.2831801822603411
np.mean(X[:,1])
0.49934782608695655
np.std(X[:,1])
0.31420685094310735
均值方差归一化 Standardization
X2 = np.random.randint(0, 100, (50, 2))
X2 = np.array(X2, dtype=float)
X2[:10,:]
array([[88., 40.],
[36., 15.],
[ 8., 97.],
[63., 58.],
[ 2., 15.],
[94., 64.],
[70., 9.],
[65., 20.],
[85., 72.],
[76., 27.]])
X2[:,0] = (X2[:,0] - np.mean(X2[:,0])) / np.std(X2[:,0])
X2[:,1] = (X2[:,1] - np.mean(X2[:,1])) / np.std(X2[:,1])
plt.scatter(X2[:,0], X2[:,1])
plt.show()
np.mean(X2[:,0])
4.107825191113079e-17
np.std(X2[:,0])
0.9999999999999999
np.mean(X2[:,1])
1.4876988529977099e-16
np.std(X2[:,1])
1.0