activation_function_plot.py
"""
绘制激活函数图像
"""
from matplotlib import pyplot as plt
import numpy as np
from equations import softmax
command_message = "\nList of commands are:" \
"\nend: terminate the program" \
"\nrelu: relu function" \
"\nsigmoid: sigmoid function" \
"\ntanh: tanh function" \
"\nsoftmax: softmax function" \
print(command_message)
# ======================================================================================================================
# 绘制激活函数及其导函数图像图像
if __name__ == "__main__":
function = input("Enter Activation function: ")
# 输入 'end'退出循环
while function != 'end':
# 激活函数relu
if function == 'relu':
fig = plt.figure()
x = np.arange(-10, 10, 0.01)
plt.plot(x, np.clip(x, 0, 10e50))
plt.title("y = relu(x)=max(x,0)")
plt.show()
# 导数图像
plt.plot(x, x > 0, ".", color='r')
plt.title("y = relu'(x)")
plt.show()
# 激活函数sigmoid
elif function == 'sigmoid':
fig = plt.figure()
x = np.arange(-10, 10, 0.01)
plt.plot(x, 1 / (1 + np.exp(-x)))
plt.title("y = 1/(1+exp(-x))")
plt.show()
# 导数图像
plt.plot(x, np.exp(-x) / (1 + np.exp(-x)) ** 2, color='r')
plt.title("y = exp(-x)/(1+exp(-x))^2")
plt.show()
# 激活函数tanh
elif function == 'tanh':
fig = plt.figure()
x = np.arange(-10, 10, 0.01)
plt.plot(x, (1 - np.exp(-2 * x)) / (1 + np.exp(-2 * x)))
plt.title("y = (1-exp(-2x))/(1+exp(-2x))")
plt.show()
# 导数图像
plt.plot(x, 4 * np.exp(-2 * x) / (1 + np.exp(-2 * x)) ** 2, color='r')
plt.title("y = 4exp(-2x)/(1+exp(-2x))^2")
plt.show()
else:
print("Unknown function. Please enter 'relu', 'sigmoid', 'tanh', or 'end' to exit.")
# 再次请求输入
function = input("Enter Activation function: ")
print("\nSee you later user.")
backward.py
"""
反向传播
"""
import numpy as np
from equations import softmax_backward_CE, softmax_backward_MSE, relu_backward, sigmoid_backward, tanh_backward
from prep_data import m_train, y
# 反向传播
def backward(dZ, cache):
linear_cache, _, _ = cache[:3]
A_prev, W, b = linear_cache
dW = (1 / m_train) * np.dot(dZ, np.transpose(A_prev))
db = (1 / m_train) * np.sum(dZ, axis=1, keepdims=True)
dA_prev = np.dot(np.transpose(W), dZ)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
assert (dA_prev.shape == A_prev.shape)
return (dW, db), dA_prev
# 加上激活函数
def linear_backward(A, cache, Y=y, cost='CE', activation='relu'):
dZ = None
_, Z, _ = cache[:3]
# 不同的激活函数
if activation== 'softmax':
# 不同的损失函数
if cost == 'CE':
dZ = softmax_backward_CE(A, Z, Y)
elif cost == 'MSE':
dZ = softmax_backward_MSE(A, Z, Y)
elif activation == 'relu':
dZ = relu_backward(A, Z)
elif activation == 'sigmoid':
dZ = sigmoid_backward(A, Z)
elif activation == 'tanh':
dZ = tanh_backward(A, Z)
gradient, dA_prev = backward(dZ, cache)
# gradient = (dW, db)
return gradient, dA_prev
# 反向传播L次
def L_model_backward(AL, caches, Y_param=y, cost_function='CE', activation='relu',keep_prob=1.0):
# 记录梯度
gradients = []
L = len(caches) - 1
# dWL, dWb1
gradient, dA_prev = linear_backward(AL, caches[L], Y=Y_param, cost=cost_function,
activation='softmax')
gradients.append(gradient)
dA = dA_prev
for l in range(L - 1, -1, -1):
### START ###
# if keep_prob < 1.0:
# 请补充问题3相关代码
if keep_prob<1.0:
dA = dA * caches[l][3]
### END ###
gradient, dA_prev = linear_backward(dA, caches[l], activation=activation)
gradients.append(gradient)
dA = dA_prev
gradients = gradients[::-1]
return gradients
coompute_cost.py
"""
损失函数
"""
import numpy as np
from prep_data import m_train
# 交叉熵损失函数
def compute_cost_CE(AL, y):
"""
AL: 预测值
y: 真实值
cost: 损失值
"""
cost = (-1 / m_train) * np.sum(y * np.log(AL + 1e-8))
cost = np.squeeze(cost) # 将多维转化为一个数字
assert (cost.shape == ())
return cost
# 均方误差损失函数
def compute_cost_MSE(AL, y):
cost = None
### START ###
# 请补充问题2相关代码
cost=np.mean(np.square(AL-y))
### END ###
return cost
equtions.py
"""
激活函数
"""
import numpy as np
# 激活函数softmax
# 正向传播
def softmax(Z):
"""
Z: 线性变换计算出的结果
"""
e_Z = np.exp(Z - np.max(Z))
A = e_Z / e_Z.sum(axis=0)
assert (A.shape == Z.shape)
cache = Z
return A, cache
# 交叉熵损失函数反向传播
def softmax_backward_CE(AL, cache, y):
"""
AL: 预测值
cache: 缓存的数据
y: 真实值
dZ: Z的偏导数
"""
Z = cache
dZ = AL - y
assert (dZ.shape == Z.shape)
return dZ
# 均方误差损失函数反向传播
def softmax_backward_MSE(A, cache, y):
### START ###
# 请补充问题2相关代码
"""
A: softmax层的输出
cache: 缓存的数据,包含Z
y: 真实值,独热编码形式
dZ: Z的偏导数
"""
Z=cache
dZ= A- y
assert (dZ.shape == Z.shape)
### END ###
return dZ
# 激活函数relu
# 正向传播
def relu(Z):
A = np.maximum(0, Z)
assert (A.shape == Z.shape)
cache = Z # cache is used in backprop
return A, cache
# 反向传播
def relu_backward(dA, cache):
"""
dA: A的偏导数,A是激活函数的输出结果
cache: 缓存的数据
dZ: Z的偏导数,Z是线性变得到的结果
"""
Z = cache
dZ = np.array(dA, copy=True) # gradient is 1 for z > 0
dZ[Z <= 0] = 0 # gradient is 0 for z <= 0 otherwise 1 for rest
assert (dZ.shape == Z.shape)
return dZ
# 激活函数sigmoid
# 正向传播
def sigmoid(Z):
### START ###
# 请补充问题2相关代码
A= 1/ (1+np.exp(-Z))
cache= Z
### END ###
return A, cache
# 反向传播
def sigmoid_backward(dA, cache):
### START ###
# 请补充问题2相关代码
Z = cache
s = 1 / (Z + np.exp(-Z))
dZ = dA * s * (1 - s)
### END ###
return dZ
# 激活函数tanh:
# 正向传播
def tanh(Z):
### START ###
# 请补充问题2相关代码
A=np.tanh(Z)
cache=Z
### END ###
return A, cache
# 反向传播
def tanh_backward(dA, cache):
### START ###
# 请补充问题2相关代码
Z=cache
dZ=dA*(1-np.tanh(Z)**2)
### END ###
return dZ
forward.py
"""
正向传播
"""
import numpy as np
from equations import softmax, relu, sigmoid, tanh
# 线性正向传播一次: Z = W * A + b
def forward(A_prev, W, b):
Z = np.dot(W, A_prev) + b
assert (Z.shape == (W.shape[0], A_prev.shape[1]))
cache = (A_prev, W, b) # 记录每层的参数,用于后续的反向传播
return Z, cache
# 线性传播加激活函数正向传播一次 : Z = W * A + b, A = relu(Z)
def linear_forward(A_prev, W, b, activation, keep_prob=1.0):
linear_cache, Z, A = None, None, None
Z, linear_cache = forward(A_prev, W, b)
cache = None
if activation == "softmax":
A, Z= softmax(Z)
elif activation == "relu":
A, Z = relu(Z)
elif activation == "sigmoid":
A, Z = sigmoid(Z)
elif activation== "tanh":
A, Z = tanh(Z)
assert (A.shape == (W.shape[0], A_prev.shape[1]))
D = None
if keep_prob == 1.0:
cache = (linear_cache, Z, D)
### START ###
# elif keep_prob < 1.0:
# 请补充问题3相关代码
else:
D=np.random.rand(A.shape[0],A.shape[1])
D=(D<keep_prob)/keep_prob
A*=D
cache=(linear_cache,Z,A,D)
### END ###
return A, cache
def L_model_forward(X, parameters, activation, keep_prob=1.0):
A = X
caches = []
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
A, cache = linear_forward(A_prev, parameters["W" + str(l)], parameters["b" + str(l)],
activation=activation, keep_prob=keep_prob)
caches.append(cache)
AL, cache = linear_forward(A, parameters["W" + str(L)], parameters["b" + str(L)],
activation='softmax', keep_prob=1.0) # 输出层不使用dropout
caches.append(cache)
assert (AL.shape == (10, X.shape[1]))
return AL, caches
initialize_parameters.py
"""
参数初始化
"""
import numpy as np
# 初始化参数权重W,偏置b
def initialize_parameters(layers):
"""
layers: 网络的层维度
parameters: 权重和偏置参数
"""
parameters = {} # 用字典的形式记录每层参数
L = len(layers)
for i in range(1, L):
parameters['W' + str(i)] = np.random.randn(layers[i], layers[i - 1]) * 0.01
parameters['b' + str(i)] = np.zeros((layers[i], 1))
assert (parameters['W' + str(i)].shape == (layers[i], layers[i - 1]))
assert (parameters['b' + str(i)].shape == (layers[i], 1))
return parameters
initialize_user.py
"""
用户初始化选择变量
"""
# 判断是否为整数
def is_positive_integer(value):
try:
n = int(value)
return n > 0
except ValueError:
return False
# 定义神经网络每层神经元个数
def generate_layer_dims():
while True:
hidden_layers = input("请输入隐藏层数:")
if is_positive_integer(hidden_layers):
m = int(hidden_layers)
break
else:
print("输入必须是大于 0 的整数,请重新输入。")
layer_dims = [784] # 第一个元素为输入层神经元数量
for i in range(1, m + 1):
while True:
neurons = input(f"请输入第 {i} 层的神经元数量:")
if is_positive_integer(neurons):
layer_dims.append(int(neurons))
break
else:
print(f"第 {i} 层的神经元数量必须是大于 0 的整数,请重新输入。")
layer_dims.append(10) # 最后一个元素为输出层神经元数量
return layer_dims
# 判断用户输入数据是否合法
def get_user_input(prompt, validator):
"""
prompt: 字符串,用于提示用户输入.
validator: 函数,用于验证用户输入是否符合要求。
"""
while True:
input_data = input(prompt) # 获取用户输入的数据
if validator(input_data): # 如果用户输入的数据符合要求则返回这个数据
return input_data
else:
print("输入不合法,请重新输入。")
# 获取用户输入数据
def user_input():
"""
iterations: 迭代次数
learning_rate: 学习率
cost_function: 损失函数
activation: 激活函数
technique: 优化方法
dropout: 是否加入正则化
keep_prob: 保留神经元的概率
"""
iterations = int(get_user_input("请输入迭代次数:", lambda x: x.isdigit() and int(x) > 0))
learning_rate = float(get_user_input("请输入学习率:", lambda x: x.replace('.', '', 1).isdigit() and 0 <= float(x) <= 1))
cost_function = get_user_input("请选择损失函数('CE'或'MSE'):", lambda x: x in ['CE', 'MSE'])
activation = get_user_input("请选择隐藏层的激活函数('relu'或'sigmoid'或'tanh'):", lambda x: x in ['relu', 'sigmoid', 'tanh'])
technique = get_user_input("请选择优化方法('adam'或'gd'或'momentum'):", lambda x: x in ['adam', 'gd', 'momentum'])
dropout = get_user_input("请选择是否加入正则化('true'或'false'):", lambda x: x in ['true', 'false'])
keep_prob = 1.0
if dropout == 'true':
keep_prob = float(get_user_input("请输入保留神经元的概率(0~1):", lambda x: x.replace('.', '', 1).isdigit() and 0 <= float(x) <= 1))
return iterations, learning_rate, cost_function, activation, technique, keep_prob
main.py
"""
主函数
"""
import numpy as np#1:数学库,提供了数组和矩阵的支持
import pickle#1:用于序列化和反序列化对象,用于保存计算值,模型权重,数据集等
import time#记录训练时间
from matplotlib import pyplot as plt#绘制损失曲线,准确率曲线图像
from prep_data import test_data, train_data, m_train, m_test, labels_train, labels_test
from new_image import image
from model import VanillaNN#前馈神经网络模型
from initialize_user import generate_layer_dims, user_input
from utils import vector_to_digit ,test_accuracy
# 指令代表含义
command_message = "\n控制指令列表:" \
"\ne: 终止程序" \
"\nnew : 测试载入的 new.png 图像样本" \
"\ntrain : 训练神经网络模型" \
"\nc: 绘制训练集和验证集的损失值曲线" \
"\nacc: 打印训练集和测试集的准确率" \
"\ntest: 从测试集中随机测试一张新的图像并对其进行分类" \
"\ncommands: 打印这个指令列表\n"
# 打印指令列表
print(command_message)
# ======================================================================================================================
# 主函数
if __name__ == "__main__":
parameters, train_costs = None, None
# 用户输入初始变量
layers = generate_layer_dims()
epochs, alpha, cost_func, act_func, opt_tech, drop_rate = user_input()#epochs训练次数
# model初始化神经网络模型
model = VanillaNN(layer_dims=layers, iterations=epochs, learning_rate=alpha, mini_batch_size=1024,
cost_function=cost_func, activation=act_func, print_cost=True)
user = input("请输入控制指令: ") #输出判断符号
# 'e'结束程序
while user != 'e':
# 使用’adam‘方法迭代更新参数
if user == 'train':
start = time.time()
# 记录初始时间
parameters, train_costs, cv_costs = model.train(X=train_data, technique=opt_tech, keep_prob=drop_rate) # 训练训练集
end = time.time() # 记录结束时间
print(f"\nTime taken for {epochs} epochs: {end - start} seconds \n") # 记录训练时间
pickle_out = open("../填空/dict.pickle", "wb")
pickle_cost = open("../填空/costs_place.pickle", "wb")
pickle_cvcosts = open("../填空/cv_costs.pickle", "wb")#CV=cost_variance成本偏差
pickle.dump(cv_costs, pickle_cvcosts)
pickle.dump(train_costs, pickle_cost)
pickle.dump(parameters, pickle_out)
pickle_out.close()
pickle_cost.close()
pickle_cvcosts.close()
elif user == "new":
pickle_in = open("../填空/dict.pickle", "rb")
parameters = pickle.load(pickle_in)
X_test = image
output = model.test(parameters, X_test)
print(f"\nOutput probabilities are: \t\n{output}\n")
digit = np.where(output == np.amax(output))[0][0]
fig = np.asarray(X_test).reshape(28, 28)
plt.title(f"The test example digit is: {digit}")
plt.imshow(fig)
plt.show()
plt.close()
elif user == 'test':
pickle_in = open("../填空/dict.pickle", "rb")
parameters = pickle.load(pickle_in)
# 创建一个图形和一组子图,此处是1行5列的布局
fig, axs = plt.subplots(1, 5, figsize=(15, 3))
# 循环测试五张图片
for i in range(5):
X_test = test_data[:, np.random.randint(m_test)].reshape(784, 1) # 随机选择一个图像
output = model.test(parameters, X_test) # 获取模型的输出
digit = np.where(output == np.amax(output))[0][0] # 确定预测的数字
img = np.asarray(X_test).reshape(28, 28) # 将图像数据重塑为 28x28
axs[i].imshow(img, cmap='gray') # 在子图上显示图像
axs[i].set_title(f"Digit: {digit}") # 设置子图的标题
axs[i].axis('off') # 关闭坐标轴显示
plt.show()
plt.close()
elif user == 'c': # 绘制损失函数图像
pickle_inc = open("../填空/costs_place.pickle", "rb")
pickle_cv = open("../填空/cv_costs.pickle", "rb")
train_costs = pickle.load(pickle_inc)
cv_costs = pickle.load(pickle_cv)
width_in_inches = 12
height_in_inches = 30
dots_per_inch = 100
plt.figure(figsize=(width_in_inches, height_in_inches), dpi=dots_per_inch)
plt.plot(train_costs, '^-r', label="train", mew=1, linewidth=1)
plt.legend(loc="upper right", fontsize=15)
plt.plot(cv_costs, '^-b', label="validation", mew=1, linewidth=1)
plt.title("Cost (train and validation) as the model trains", fontsize=15, color='black')
plt.xlabel('Epoch', fontsize=15, color='black')
plt.ylabel("Cost", fontsize=15, color='black')
plt.legend(loc="upper right", fontsize=15)
plt.xticks(range(0, len(train_costs) + 1), fontsize=10, color='black')
plt.yticks(fontsize=10, color='black')
plt.subplots_adjust(left=0.1, right=0.95, bottom=0.2, top=0.85, wspace=1, hspace=0.4)
plt.show()
plt.close()
elif user == 'acc': # 计算训练集和测试集的准确率
pickle_in = open("../填空/dict.pickle", "rb")
parameters = pickle.load(pickle_in)
temp_train = model.test(parameters, train_data) # shape (10, 32000)
train_predictions = vector_to_digit(temp_train, size=m_train) # shape (1, 32000)
temp_test = model.test(parameters, test_data) # shape (10, 10000)
test_predictions = vector_to_digit(temp_test, size=m_test) # shape (1, 10000)
train_accuracy = test_accuracy(train_predictions, ground_truth=labels_train, size=m_train)
tests_accuracy = test_accuracy(test_predictions, ground_truth=labels_test, size=m_test)
print(f"\nAccuracy on training set is: {train_accuracy}%")
print(f"Accuracy on testing set is: {tests_accuracy}%\n")
elif user == 'commands': # 再次给出提示
print(command_message)
user = input("请输入控制指令: ")
print("\nSee you later user.")
model.py
"""
神经网络模型
"""
from matplotlib import pyplot as plt
from prep_data import train_data, test_data, cv_data, y, y_cv, labels_train, m_train, m_cv
from initialize_parameters import initialize_parameters
from compute_cost import compute_cost_CE, compute_cost_MSE
from update_parameters import (update_adam_parameters, initialize_adam, update_gd_parameters, initialize_velocity,
update_momentum_parameters)
from forward import L_model_forward
from backward import L_model_backward
from utils import make_batches
plt.rcParams['figure.figsize'] = (10.0, 7.0) #设置Matplotlib绘图库中图表(plots)的默认尺寸大小
# 神经网络模型(类)
class VanillaNN:
# 初始化类内变量
def __init__(self, parameters=None, layer_dims=None, iterations=25, learning_rate=None, mini_batch_size=512,
cost_function='CE', activation='relu', print_cost=False):
if layer_dims is None:
self.layer_dims = [784, 30, 30, 10] # 网络的层维度
else:
self.layer_dims = layer_dims
self.parameters = parameters # 参数:w,b(字典)
self.iterations = iterations # 迭代次数
self.learning_rate = learning_rate # 学习速率
self.mini_batch_size = mini_batch_size # 最小分割后的样本数量
self.cost_function = cost_function # 损失函数
self.activation = activation # 激活函数
self.print_cost = print_cost # 是否打印损失值
self.costs = [] # 每次迭代训练集产生的损失值
self.cv_costs = [] # 每次迭代验证集产生的损失值
self.v, self.s = None, None # v:一阶矩, s:二阶矩
self.v2 = None # v2:一阶矩
# 训练测试集
def train(self, X=train_data, Y=y, layer_dims=None, learning_rate=None, technique='adam', keep_prob=1.0):
"""
X: 训练集样本
Y: 训练集样本的真实值
layer_dims: 网络的层维度
learning_rate: 学习率
technique: 优化方法
keep_prob: 保留神经元的概率
costs, cv_costs: 训练集损失值,和验证集损失值
"""
# 初始化
if layer_dims is not None:
self.layer_dims = layer_dims # 初始神经元层数及每层个数
self.parameters = initialize_parameters(self.layer_dims) # 初始化参数
self.v, self.s = initialize_adam(self.parameters) # 初始化一阶矩和二阶矩
self.v2 = initialize_velocity(self.parameters) # 初始化动量法的参数
t = 0 # 初始迭代过的次数=0
# 循环迭代iterations次
for i in range(1, self.iterations + 1):
batches = make_batches(X, Y, batch_size=self.mini_batch_size) # 将训练集切割
cost, cv_cost = 0, 0
curr_cost, curr_cv_cost = 0, 0 # 初始损失值设置为0
# 循环训练每个切分出的训练集
for batch in batches:
curr_X, curr_Y = batch
# 正向传播
AL, caches = L_model_forward(curr_X, self.parameters, self.activation, keep_prob=keep_prob) # 训练集正向传播
cv_AL, _ = L_model_forward(cv_data, self.parameters, self.activation, keep_prob=keep_prob) # 验证集正向传播
# 选择不同的损失函数
if self.cost_function == 'CE':
cost = compute_cost_CE(AL, curr_Y) # 训练集的损失值
cv_cost = compute_cost_CE(cv_AL, y_cv) # 验证集的损失值
elif self.cost_function == 'MSE':
cost = compute_cost_MSE(AL, curr_Y)
cv_cost = compute_cost_MSE(cv_AL, y_cv)
curr_cost += cost # 训练集损失值求和(每次计算的是分割后的最小样本数)
curr_cv_cost += cv_cost # 验证集损失值求和
# 反向传播
gradients = L_model_backward(AL, caches, Y_param=curr_Y, cost_function=self.cost_function,
activation=self.activation, keep_prob=keep_prob) # 训练集反向传播求梯度
# 不同方法对参数更新
if technique == 'adam':
if learning_rate is not None:
self.learning_rate = learning_rate
t += 1
self.parameters, self.v, self.s = update_adam_parameters(self.parameters, gradients, self.v,
self.s, t, self.learning_rate)
elif technique == 'gd':
if learning_rate is not None:
self.learning_rate = learning_rate
self.parameters = update_gd_parameters(self.parameters, gradients, self.learning_rate)
elif technique == 'momentum':
if learning_rate is not None:
self.learning_rate = learning_rate
t += 1
self.parameters, self.v2 = update_momentum_parameters(self.parameters, gradients, self.v2, t,
self.learning_rate)
t = 0 # 重置迭代过的次数
curr_cost = curr_cost / m_train # 计算出训练集每个样本平均损失值
curr_cv_cost = curr_cv_cost / m_cv / len(batches) # 计算出验证集每个样本平均损失值
self.costs.append(curr_cost)
self.cv_costs.append(curr_cv_cost)
# 打印每一次迭代的损失值
if (i == 0 or i % 1 == 0) and self.print_cost:
print(f"Cost for iteration # {i}: {curr_cost}")
print(f"Cost for iteration # {i}: {curr_cv_cost}")
return self.parameters, self.costs, self.cv_costs # 返回最终迭代完确定的参数,训练集损失值,和验证集损失值
# 训练测试集(训练集训练完成后,确定了参数,用于正式测试集测试效果)
def test(self, parameters=None, X_test=test_data):
"""
parameters: 参数包括权重W和偏置b,正常直接应用train函数训练好的参数
X_test: 测试集样本
AL: 预测值
"""
if parameters is None:
parameters = self.parameters
AL, _ = L_model_forward(X_test, parameters, self.activation)
return AL
new_image.py
"""
将图像转换为一个(1, 784)的NumPy数组
"""
import numpy as np
from PIL import Image, ImageFilter
def image_prepare(argv):
# 使用PIL库打开图像文件,并将其转换为灰度图('L'表示灰度图像)。
im = Image.open(argv).convert('L')
# 获取图像宽度和高度:
width = float(im.size[0])
height = float(im.size[1])
# 创建新的白色画布:
newImage = Image.new('L', (28, 28), 255)
# 调整图像大小并粘贴到新画布上:
if width > height:
nheight = int(round((20.0 / width * height), 0))
if nheight == 0:
nheight = 1
img = im.resize((20, nheight), Image.ANTIALIAS).filter(ImageFilter.SHARPEN)
wtop = int(round(((28 - nheight) / 2), 0))
newImage.paste(img, (4, wtop))
else:
nwidth = int(round((20.0 / height * width), 0))
if nwidth == 0:
nwidth = 1
img = im.resize((nwidth, 20), Image.LANCZOS).filter(ImageFilter.SHARPEN)
wleft = int(round(((28 - nwidth) / 2), 0))
newImage.paste(img, (wleft, 4))
# 获取图像的像素值:
tv = list(newImage.getdata())
# 像素值归一化:
tva = [(255 - x) * 1.0 / 255.0 for x in tv]
# 返回处理后的图像:
return tva
# 使用函数处理图像并转换为NumPy数组:
image = np.asarray(image_prepare("digit3.png")).reshape(784, 1)
prep_data.py
"""
数据处理,得到训练集、验证集和测试集
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("../填空/train.csv") # 42000
# Y(真实结果)
labels = np.asarray(data['label']) # (42000,) 一维数组
# 将 'label' 列设置为 DataFrame 的索引列,这意味着以后可以通过标签来索引行。
data.set_index('label', inplace=True)
# 将整个 DataFrame 转换为 NumPy 数组,其中每一行代表一个手写数字图像的像素值。
data_original = np.asarray(data) # (42000, 784)
# 对矩阵进行转置
data_original = np.transpose(data_original) # (784, 42000)
# 训练集(30000个)
# 从转置后的数据中切分出训练集,取前 30000 个样本作为训练数据。
train_data = data_original[:, : 30000] # (784, 30000)
# 提取与训练数据对应的前 30000 个标签,作为训练集的标签。
labels_train = labels[: 30000] # (30000,) 一维数组
# 验证集(5000个)
# 从转置后的数据中切分出交叉验证集(Validation set),这里取从第 30000 个到第 34999 个样本,作为交叉验证集。
cv_data = data_original[:, 30000:35000] # (784, 5000)
# 提取与交叉验证集对应的标签,作为交叉验证集的标签。
labels_cv = labels[30000:35000] # (5000,) 一维数组
# 测试集(7000个)
# 从转置后的数据中切分出测试集,从第 35000 个样本开始到最后一个样本。test_data 是一个 784x7000 的数组,代表测试集中的图像像素值。
test_data = data_original[:, 35000:] # shape(784, 7000)
# 提取与测试集对应的标签,作为测试集的标签。
labels_test = labels[35000:] # (7000,) 一维数组
# 计算训练集、验证集和测试集的样本数量。
m_train = len(labels_train) # 训练集样本数量30000
m_cv = len(labels_cv) # 验证集样本数量5000
m_test = len(labels_test) # 测试集样本数量7000
# 训练集
# 将训练集标签从(30000,),转换成(10,30000),每列标签值对应的行数为1,其余为0
y = []
for i in range(m_train):
temp = np.zeros((10,), dtype=int)
temp[labels_train[i]] = 1
y.append(temp)
# 将y转换成numpy数组
y = np.asarray(y)
y = np.transpose(y) # (10, 30000)
# 验证集
# 将验证集标签从(5000,),转换成(10,5000),每列标签值对应的行数为1,其余为0
y_cv = []
for i in range(m_cv):
temp = np.zeros((10,), dtype=int)
temp[labels_cv[i]] = 1
y_cv.append(temp)
# 将y_cv转换成numpy数组
y_cv = np.asarray(y_cv) # (5000, 10)
y_cv = np.transpose(y_cv) # (10, 5000)
# ======================================================================================================================
if __name__ == "__main__":
# 可视化
# 随机选择一些图像样本
num_samples_to_visualize = 5
sample_indices = np.random.randint(0, m_train, num_samples_to_visualize)
# 显示图像样本
fig, axes = plt.subplots(1, num_samples_to_visualize, figsize=(10, 2))
for i, idx in enumerate(sample_indices):#enumerate计数器
axes[i].imshow(train_data[:, idx].reshape(28, 28), cmap='gray')
axes[i].axis('off')
plt.show()
# 归一化
# 将像素值缩放到 [0, 1] 范围内
# train_data_normalized = train_data / 255.0
# cv_data_normalized = cv_data / 255.0
# test_data_normalized = test_data / 255.0
updata_parameters.py
"""
更新参数
"""
import numpy as np
# 梯度下降法更新参数
def update_gd_parameters(parameters, gradients, alpha=0.01):
"""
parameters: 权重和偏置参数
gradients: 梯度
alpha: 学习率
"""
L = len(parameters) // 2
for l in range(1, L + 1):
parameters['W' + str(l)] -= alpha * gradients[l - 1][0]
parameters['b' + str(l)] -= alpha * gradients[l - 1][1]
return parameters # 返回更新后的参数
# ======================================================================================================================
# 动量法更新参数
def initialize_velocity(parameters):
"""
parameters: 权重和偏置参数
"""
L = len(parameters) // 2
v = {}
for l in range(L):
v["dW" + str(l + 1)] = np.zeros(parameters['W' + str(l + 1)].shape)
v["db" + str(l + 1)] = np.zeros(parameters['b' + str(l + 1)].shape)
return v
def update_momentum_parameters(parameters, gradients, v, t, alpha=0.01):
beta = 0.9
L = len(parameters) // 2 # number of layers in the neural networks
# Momentum update for each parameter
for l in range(L):
v["dW" + str(l + 1)] = beta * v["dW" + str(l + 1)] + (1 - beta) * gradients[l][0]
v["db" + str(l + 1)] = beta * v["db" + str(l + 1)] + (1 - beta) * gradients[l][1]
v_corrected = {"dW" + str(l + 1): v["dW" + str(l + 1)] / (1 - beta ** t),
"db" + str(l + 1): v["db" + str(l + 1)] / (1 - beta ** t)}
# update parameters
parameters["W" + str(l + 1)] = parameters['W' + str(l + 1)] - alpha * v_corrected["dW" + str(l + 1)]
parameters["b" + str(l + 1)] = parameters['b' + str(l + 1)] - alpha * v_corrected["db" + str(l + 1)]
return parameters, v
# ======================================================================================================================
# Adam法更新参数
def initialize_adam(parameters):
v, s = None, None
### START ###
# 请补充问题1相关代码
v = {}
s = {}
for k in parameters:
v[k]=np.zeros_like(parameters[k])
s[k]=np.zeros_like(parameters[k])
### END ###
return v, s
def update_adam_parameters(parameters, gradients, v, s, t, alpha=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
### START ###
# 请补充问题1相关代码
L = len(parameters) // 2 # number of layers in the neural networks
# 确保 v 和 s 也是字典,并且使用相同的键
for l in range(1, L + 1):
for k in ["W" + str(l), "b" + str(l)]:
if k not in v:
v[k] = np.zeros_like(parameters[k])
if k not in s:
s[k] = np.zeros_like(parameters[k])
# 遍历每个参数和对应的梯度
grad_index = 0
for l in range(L):
for k in ["W" + str(l + 1), "b" + str(l + 1)]:
if k not in v:
v[k] = np.zeros_like(parameters[k])
if k not in s:
s[k] = np.zeros_like(parameters[k])
# Update parameters using Adam
for l in range(L):
# Retrieve the gradient and the parameter shape for the current layer
dW = gradients["dW" + str(l + 1)] if "W" + str(l + 1) in gradients else None
db = gradients["db" + str(l + 1)] if "b" + str(l + 1) in gradients else None
# Update v and s for dW and db
if dW is not None:
v["W" + str(l + 1)] = beta1 * v["W" + str(l + 1)] + (1 - beta1) * dW
s["W" + str(l + 1)] = beta2 * s["W" + str(l + 1)] + (1 - beta2) * np.square(dW)
v_corrected = v["W" + str(l + 1)] / (1 - beta1 ** t)
s_corrected = s["W" + str(l + 1)] / (1 - beta2 ** t)
parameters["W" + str(l + 1)] -= alpha * v_corrected / (np.sqrt(s_corrected) + epsilon)
if db is not None:
v["b" + str(l + 1)] = beta1 * v["b" + str(l + 1)] + (1 - beta1) * db
s["b" + str(l + 1)] = beta2 * s["b" + str(l + 1)] + (1 - beta2) * np.square(db)
v_corrected = v["b" + str(l + 1)] / (1 - beta1 ** t)
s_corrected = s["b" + str(l + 1)] / (1 - beta2 ** t)
parameters["b" + str(l + 1)] -= alpha * v_corrected / (np.sqrt(s_corrected) + epsilon)
### END ###
return parameters, v, s
utils.py
import numpy as np
from prep_data import labels_train, m_train
# 将向量转化成相应的数字标签(1, 30000)一维数组
def vector_to_digit(initial_predictions, size=None):
if size is None:
size = m_train # 30000
pred_updated = np.zeros((1, size)) # (1, 30000)
for t in range(size):
temp_pred = initial_predictions[:, t]
pred_updated[:, t] = np.where(temp_pred == np.amax(temp_pred))[0]
return pred_updated # (1, 3000) 返回更新后的输出值
# 计算精确度
def test_accuracy(predictions, ground_truth=None, size=None):
"""
predictions: 预测值
ground_truth: 真实值
size: 样本数量
accuracy: 准确率,保留两位的浮点数
"""
if ground_truth is None:
ground_truth = labels_train # labels_train: shape(1,50000)
if size is None:
size = m_train # m_train: shape(50000)
accuracy = round(np.sum(predictions == ground_truth) * 100 / size, 2)
return accuracy
# 创建用于神经网络训练的批次
def make_batches(X_data, y_data, batch_size=512):
"""
X_data: 训练集样本
y_data: 训练集样本真实值
batch_size: 小批量大小
batches: 分割后的训练集
"""
total = X_data.shape[1] # 样本数量
permutation = np.random.permutation(total) # 将(0,m]随机排列目的:将x和y一起随机打乱
shuffled_x = X_data[:, permutation] # 打乱后的x值
shuffled_y = y_data[:, permutation].reshape(10, total) # 打乱后对应x的y值
whole_batches = total // batch_size # 分组的数目
batches = [] # 用来记录切割后每组的x,y值
for i in range(whole_batches): # 循环存入whole_batches次
curr_x = shuffled_x[:, i * batch_size: (i + 1) * batch_size] # 切割后的x
curr_y = shuffled_y[:, i * batch_size: (i + 1) * batch_size] # 切割后的y
batch = (curr_x, curr_y) # 将切割的x和y放入一组
batches.append(batch) # 将每组值放入batches
# 如果总样本数不能整除最小样本数,那么将剩余的数据放入最后一组
if total % batch_size != 0:
curr_x = shuffled_x[:, whole_batches * batch_size:]
curr_y = shuffled_y[:, whole_batches * batch_size:]
batch = (curr_x, curr_y)
batches.append(batch)
return batches
test.csv和train.csv自己找哈~上传不了不知道咋回事捏..