3.1 非线性函数的构造
- 任意曲线都可以用激活函数来构造
- 任意曲线都是一些激活函数的和
- 这些激活函数 偏置和权重不同
- 两个ReLU函数才能构造一个阶跃函数或者sigmoid函数
- 所以同样情况下ReLU的激活函数(神经元)需要增加一倍
# 激活函数
# 阶跃函数的实现
import numpy as np
def step_func(x):
if x >0:
return 1
elif x<=0:
return 0
# 测试
step_func(11)
x = np.array([11,11])
# x >0
# step_func([11,11])
# TypeError: '>' not supported between instances of 'list' and 'int'
# 利用numpy数组改进
import numpy as np
import matplotlib.pylab as plt
def step_function(x):
'''阶跃函数的实现'''
y = x>0 # 得到的布尔数组
return y.astype(np.int) #转换成0,1
x = np.arange(-5.0,5.0,0.1)
y = step_function(x)
# 可视化
plt.plot(x,y)
plt.ylim(-0.1,1.1)
plt.show()
def sigmoid(x):
return 1/(1+np.exp(-x))
x = np.arange(-5,5,0.1)
y = sigmoid(x)
plt.plot(x,y)
plt.ylim(-0.1,1.1)
plt.show()
# ReLU 线性整流函数
def ReLU(x):
return np.maximum(0,x)
# 测试
ReLU(2)
x = np.arange(-5,5,0.1)
y = ReLU(x)
plt.plot(x,y)
plt.show()
3.2 多维数组
import numpy as np
A= np.array([[[1,2],[3,4],[5,6]],[[2,2],[3,3],[4,4]]])
A.shape # 维度从外到内读
3.3 神经网络的实现
# 矩阵乘法
X = np.array([1,2])
W = np.array([[1,3,5],[2,4,6]]) # 第一个是x1的权重,第二个是x2的权重
Y = np.dot(X,W)
print(Y)
# 隐藏层1的实现
X = np.array([1.0,0.5])
W1 = np.array([[0.1,0.3,0.5],[0.2,0.4,0.6]])
B1 = np.array([0.1,0.2,0.3])
print(X.shape)
print(W1.shape)
print(B1.shape)
A1 = np.dot(X,W1)+B1
Z1 = sigmoid(A1)
print(A1)
print(Z1)
# 隐藏层2的实现
W2 = np.array([[0.1,0.4],[0.2,0.5],[0.3,0.6]])
B2 = np.array([0.1,0.2])
print(Z1.shape)
print(W2.shape)
print(B2.shape)
A2 = np.dot(Z1,W2) + B2
Z2 = sigmoid(A2)
# 输出层的实现
W3 = np.array([[0.1,0.3],[0.2,0.4]])
B3 = np.array([0.1,0.2])
A3 = np.dot(Z2,W3)+B3
# 输出层激活函数
# 回归问题使用恒等函数,分类问题使用sigmoid函数、多分类用softmax函数
def identity_func(x):
'''输出层激活函数'''
return x
def _softmax(x):
exp_x = np.exp(x)
sum_exp = np.sum(exp_x)
y = exp_x /sum_exp
return y
# softmax需要解决 某个指数,数据过大问题
def softmax(x):
a = np.max(x)
exp_x = np.exp(x-a)
exp_sum = np.sum(exp_x)
y = exp_x/exp_sum
return y
Y = identity_func(A3)
print(Y)
# 测试
x = np.array([1010,1000,990])
y1 = _softmax(x)
y2 = softmax(x)
print(y1)
print(y2)
面向对象形式
class network:
def __init__(self,W1,b1,W2,b2,W3,b3):
network = {}
self.W1 = W1
self.W2 = W2
self.W3 = W3
self.b1 = b1
self.b2 = b2
self.b3 = b3
def forward(self,x):
a1 = np.dot(x,self.W1)+self.b1
z1 = sigmoid(a1)
a2 = np.dot(z1,self.W2) + self.b2
z2 = sigmoid(a2)
a3 = np.dot(z2,self.W3) +self.b3
y = identity_func(a3)
return y
x = np.array([1.0,0.5])
W1 = np.array([[0.1,0.3,0.5],[0.2,0.4,0.6]])
b1 = np.array([0.1,0.2,0.3])
W2 = np.array([[0.1,0.4],[0.2,0.5],[0.3,0.6]])
b2 = np.array([0.1,0.2])
W3 = np.array([[0.1,0.3],[0.2,0.4]])
b3 = np.array([0.1,0.2])
network = network(W1,b1,W2,b2,W3,b3)
output = network.forward(x)
print(output)
3.4 手写数字识别
-------------------------------补充资料-----------------------------------------------------
# minst.py文件
try:
import urllib.request
except ImportError:
raise ImportError('You should use Python 3.x')
import os.path
import gzip
import pickle
import os
import numpy as np
url_base = 'http://yann.lecun.com/exdb/mnist/'
key_file = {
'train_img': 'train-images-idx3-ubyte.gz',
'train_label': 'train-labels-idx1-ubyte.gz',
'test_img': 't10k-images-idx3-ubyte.gz',
'test_label': 't10k-labels-idx1-ubyte.gz'
}
dataset_dir = os.path.dirname(os.path.abspath(__file__))
save_file = dataset_dir + "/mnist.pkl"
train_num = 60000
test_num = 10000
img_dim = (1, 28, 28)
img_size = 784
def _download(file_name):
file_path = dataset_dir + "/" + file_name
if os.path.exists(file_path):
return
print("Downloading " + file_name + " ... ")
urllib.request.urlretrieve(url_base + file_name, file_path)
print("Done")
def download_mnist():
for v in key_file.values():
_download(v)
def _load_label(file_name):
file_path = dataset_dir + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
labels = np.frombuffer(f.read(), np.uint8, offset=8)
print("Done")
return labels
def _load_img(file_name):
file_path = dataset_dir + "/" + file_name
print("Converting " + file_name + " to NumPy Array ...")
with gzip.open(file_path, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
data = data.reshape(-1, img_size)
print("Done")
return data
def _convert_numpy():
dataset = {}
dataset['train_img'] = _load_img(key_file['train_img'])
dataset['train_label'] = _load_label(key_file['train_label'])
dataset['test_img'] = _load_img(key_file['test_img'])
dataset['test_label'] = _load_label(key_file['test_label'])
return dataset
def init_mnist():
download_mnist()
dataset = _convert_numpy()
print("Creating pickle file ...")
with open(save_file, 'wb') as f:
pickle.dump(dataset, f, -1)
print("Done!")
def _change_one_hot_label(X):
T = np.zeros((X.size, 10))
for idx, row in enumerate(T):
row[X[idx]] = 1
return T
def load_mnist(normalize=True, flatten=True, one_hot_label=False):
"""读入MNIST数据集
Parameters
----------
normalize : 将图像的像素值正规化为0.0~1.0
one_hot_label :
one_hot_label为True的情况下,标签作为one-hot数组返回
one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
flatten : 是否将图像展开为一维数组
Returns
-------
(训练图像, 训练标签), (测试图像, 测试标签)
"""
if not os.path.exists(save_file):
init_mnist()
with open(save_file, 'rb') as f:
dataset = pickle.load(f)
if normalize:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].astype(np.float32)
dataset[key] /= 255.0
if one_hot_label:
dataset['train_label'] = _change_one_hot_label(dataset['train_label'])
dataset['test_label'] = _change_one_hot_label(dataset['test_label'])
if not flatten:
for key in ('train_img', 'test_img'):
dataset[key] = dataset[key].reshape(-1, 1, 28, 28)
return (dataset['train_img'],
dataset['train_label']), (dataset['test_img'],
dataset['test_label'])
if __name__ == '__main__':
init_mnist()
-----------------------------------------------正文--------------------------------------------
import sys,os
sys.path.append(os.pardir)
from res.mnist import load_mnist
from PIL import Image
# (x_train, t_train),(x_test,t_test) = load_mnist(flatten=True,normalize=False)
# print(x_train.shape)
# print(t_train.shape)
# print(x_test.shape)
# print(t_test.shape)
# 展示第一张照片
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
(x_train, t_train),(x_test,t_test) = load_mnist(flatten=True,normalize=False)
# pickle功能可以将运行对象保存为文件,下次可以直接使用
# 函数中使用了pickle功能
img = x_train[0]
label = t_train[0]
print(label)
print(img.shape)
img = img.reshape(28,28)
print(img.shape)
img_show(img)
# 神经网络预测
# 1*28*28维度的图片对应784个神经元,在255*255*255中选了部分
# 输入784个神经元,输出0-9的10个神经元
# 假设隐藏层1有50个神经元,隐藏层2有100个神经元
'''1、获取输入数据
2、定义神经网络
3、利用神经网络预测 '''
def get_data():
(xtrain,t_train),(x_test,t_test) = load_mnist(
normalize=True,flatten=True,one_hot_label=False )
return x_test,t_test
def _network():
import pickle
with open('sample_weight.pkl', 'rb') as f:
network = pickle.load(f)
return network
def predict(network,x):
W1,W2,W3 = network['W1'], network['W2'], network['W3']
b1,b2,b3 = network['b1'],network['b2'],network['b3']
a1 = np.dot(x,W1)+b1
z1 = sigmoid(a1)
a2 = np.dot(z1,W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2,W3) + b3
y =softmax(a3)
return y
x,t = get_data()
network = _network()
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network,x[i])
p = np.argmax(y)
if p ==t[i]:
accuracy_cnt +=1
print("Accuracy:" + str(float(accuracy_cnt)/len(x)))
# 所用神经网络的参数特征
print(x.shape)
print(x[0].shape)
print(W1.shape)
W1,W2,W3 = network['W1'], network['W2'], network['W3']
print(W1.shape)
print(W2.shape)
print(W3.shape)
# 因为神经网络只限制列,所以输入100张照片的像素和1张照片的像素无区别
# 使用batch能够提高计算性能
x,t = get_data()
network = _network()
batch_size = 100
accuracy_cnt = 0
for i in range(0,len(x),batch_size): # 取0 100 200 读取数据变快
x_batch = x[i:i+batch_size] # 在循环中再取 1 2 101 102 相当于两次循环
y_batch = predict(network,x_batch)
p = np.argmax(y_batch,axis=1)
accuracy_cnt += np.sum(p==t[i:i+batch_size])
print("Accuracy:" + str(float(accuracy_cnt/len(x))))