ReLU激活函数 import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches plt.figure(figsize=(10, 5)) x = np.arange(-10, 10, 0.1) s = 1.0 / (1 + np.exp(0. - x)) y = np.clip(x, a_min=0., a_max=None) f = plt.subplot(121) plt.plot(x, s, color='r') currentAxis=plt.gca() plt.text(-9.0, 0.9, r'$y=Sigmoid(x)$', fontsize=13) currentAxis.xaxis.set_label_text('x', fontsize=15) currentAxis.yaxis.set_label_text('y', fontsize=15) f = plt.subplot(122) plt.plot(x, y, color='g') plt.text(-3.0, 9, r'$y=ReLU(x)$', fontsize=13) currentAxis=plt.gca() currentAxis.xaxis.set_label_text('x', fontsize=15) currentAxis.yaxis.set_label_text('y', fontsize=15) plt.show()
批归一化方法
示例一: 当输入数据形状是[N,K][N, K][N,K]时,一般对应全连接层的输出
import numpy as np import paddle from paddle.nn import BatchNorm1D data = np.array([[1,2,3], [4,5,6], [7,8,9]]).astype('float32') bn = BatchNorm1D(num_features=3) x = paddle.to_tensor(data) y = bn(x) print('output of BatchNorm1D Layer: \n {}'.format(y.numpy())) a = np.array([1,4,7]) a_mean = a.mean() a_std = a.std() b = (a - a_mean) / a_std print('std {}, mean {}, \n output {}'.format(a_mean, a_std, b))
示例二: 当输入数据形状是[N,C,H,W][N, C, H, W][N,C,H,W]时, 一般对应卷积层的输出
import numpy as np import paddle from paddle.nn import BatchNorm2D np.random.seed(100) data = np.random.rand(2,3,3,3).astype('float32') bn = BatchNorm2D(num_features=3) x = paddle.to_tensor(data) y = bn(x) print('input of BatchNorm2D Layer: \n {}'.format(x.numpy())) print('output of BatchNorm2D Layer: \n {}'.format(y.numpy())) a = data[:, 0, :, :] a_mean = a.mean() a_std = a.std() b = (a - a_mean) / a_std print('channel 0 of input data: \n {}'.format(a)) print('std {}, mean {}, \n output: \n {}'.format(a_mean, a_std, b))
丢弃法
import paddle import numpy as np np.random.seed(100) data1 = np.random.rand(2, 3, 3, 3).astype('float32') data2 = np.arange(1, 13).reshape([-1, 3]).astype('float32') x1 = paddle.to_tensor(data1) drop11 = paddle.nn.Dropout(p=0.5, mode='downscale_in_infer') droped_train11 = drop11(x1) drop11.eval() droped_eval11 = drop11(x1) drop12 = paddle.nn.Dropout(p=0.5, mode='upscale_in_train') droped_train12 = drop12(x1) drop12.eval() droped_eval12 = drop12(x1) x2 = paddle.to_tensor(data2) drop21 = paddle.nn.Dropout(p=0.5, mode='downscale_in_infer') droped_train21 = drop21(x2) drop21.eval() droped_eval21 = drop21(x2) drop22 = paddle.nn.Dropout(p=0.5, mode='upscale_in_train') droped_train22 = drop22(x2) drop22.eval() droped_eval22 = drop22(x2) print('x1 {}, \n droped_train11 \n {}, \n droped_eval11 \n {}'.format(data1, droped_train11.numpy(), droped_eval11.numpy())) print('x1 {}, \n droped_train12 \n {}, \n droped_eval12 \n {}'.format(data1, droped_train12.numpy(), droped_eval12.numpy())) print('x2 {}, \n droped_train21 \n {}, \n droped_eval21 \n {}'.format(data2, droped_train21.numpy(), droped_eval21.numpy())) print('x2 {}, \n droped_train22 \n {}, \n droped_eval22 \n {}'.format(data2, droped_train22.numpy(), droped_eval22.numpy()))