数据归一化


除了·边界比较明显的数据集(像素),一般用均值方差归一化。

测试数据集要用训练集的平均数和标准差进行归一化

import numpy as np
import matplotlib.pyplot as plt
最值归一化 normalization
x=np.random.randint(0,100,100)
x
array([84, 5, 7, 97, 16, 15, 64, 71, 55, 58, 12, 0, 73, 41, 27, 92, 97,
21, 29, 69, 46, 7, 70, 68, 61, 59, 65, 2, 70, 30, 34, 45, 86, 29,
17, 21, 41, 50, 5, 51, 3, 27, 68, 25, 53, 76, 15, 9, 16, 63, 62,
65, 39, 78, 76, 82, 83, 67, 51, 6, 32, 30, 99, 56, 65, 80, 31, 12,
4, 33, 54, 95, 63, 87, 62, 55, 86, 27, 84, 96, 35, 54, 64, 88, 8,
36, 99, 27, 50, 53, 95, 56, 20, 70, 15, 70, 27, 40, 4, 54])
(x-np.min(x))/(np.max(x)-np.min(x))
array([0.84848485, 0.05050505, 0.07070707, 0.97979798, 0.16161616,
0.15151515, 0.64646465, 0.71717172, 0.55555556, 0.58585859,
0.12121212, 0. , 0.73737374, 0.41414141, 0.27272727,
0.92929293, 0.97979798, 0.21212121, 0.29292929, 0.6969697 ,
0.46464646, 0.07070707, 0.70707071, 0.68686869, 0.61616162,
0.5959596 , 0.65656566, 0.02020202, 0.70707071, 0.3030303 ,
0.34343434, 0.45454545, 0.86868687, 0.29292929, 0.17171717,
0.21212121, 0.41414141, 0.50505051, 0.05050505, 0.51515152,
0.03030303, 0.27272727, 0.68686869, 0.25252525, 0.53535354,
0.76767677, 0.15151515, 0.09090909, 0.16161616, 0.63636364,
0.62626263, 0.65656566, 0.39393939, 0.78787879, 0.76767677,
0.82828283, 0.83838384, 0.67676768, 0.51515152, 0.06060606,
0.32323232, 0.3030303 , 1. , 0.56565657, 0.65656566,
0.80808081, 0.31313131, 0.12121212, 0.04040404, 0.33333333,
0.54545455, 0.95959596, 0.63636364, 0.87878788, 0.62626263,
0.55555556, 0.86868687, 0.27272727, 0.84848485, 0.96969697,
0.35353535, 0.54545455, 0.64646465, 0.88888889, 0.08080808,
0.36363636, 1. , 0.27272727, 0.50505051, 0.53535354,
0.95959596, 0.56565657, 0.2020202 , 0.70707071, 0.15151515,
0.70707071, 0.27272727, 0.4040404 , 0.04040404, 0.54545455])
X=np.random.randint(0,100,(50,2))
X[:10,:]
array([[55, 33],
[51, 53],
[40, 14],
[10, 24],
[90, 36],
[76, 34],
[45, 48],
[86, 89],
[88, 68],
[ 4, 39]])
X=np.array(X,dtype=float)
X[:10,:]
array([[55., 33.],
[51., 53.],
[40., 14.],
[10., 24.],
[90., 36.],
[76., 34.],
[45., 48.],
[86., 89.],
[88., 68.],
[ 4., 39.]])
X[:,0]=(X[:,0]-np.min(X[:,0])