Content
Assignment
实现TensorFlow Playground的部分功能。
数据分布为circle,暂时写成2层,一层有多个神经元;用填充的等高线画出分类边界。
Code
1. import几个库:numpy、matplotlib(画图用)、sklearn(生成data用)。
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
2. 生成数据集
采用的是sklearn库中的方法,factor:两个circle的远近
-> Scale factor between inner and outer circle.
X, y = datasets.make_circles(n_samples=300, noise=0.2, factor=0.3)
nn_input_dim = 2
nn_output_dim = 2
num_examples = len(X)
learning_rate = 1e-2
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
预览一下是这样的:
3. 正向推理
从输入到输出的过程。没找到sigmoid就用了双曲正切,它的导数是
1
−
t
a
n
h
2
(
)
1-tanh^2()
1−tanh2()
def inference(x1, x2, model):
# x1:scalar是第一个特征, X维度: (2,1)是样本
X = np.hstack(([x1], [x2]))
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# X.shape (2,)
# W1.shape (2,hidden_dim) b1.shape (1, hidden_dim),每一个隐藏层的神经元都有一个偏置
# W2.shape (hidden_dim,2) b2.shape (1, 2)
z1 = X.dot(W1) + b1 # z = wx + b
a1 = np.tanh(z1) # activation
z2 = a1.dot(W2) + b2 # 终点输出层
# 第二次激不激活都可以 就没有激活
exp_z2 = np.exp(z2) # softmax
probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
# probs.shape (1,2)
return probs[0][0]
4. loss函数
def calculate_loss(model):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
z1 = X.dot(W1) + b1
# z1.shape (300,h)
a1 = np.tanh(z1)
# a1.shape (300,h)
z2 = a1.dot(W2) + b2
# z2.shape (300,2)
exp_z2 = np.exp(z2)
probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
log_probs = -np.log(probs[range(num_examples), y])
loss = np.sum(log_probs)
return loss / num_examples
5. 建立模型
def build_model(hidden_dim, iterations=1000):
# 初始化
W1 = np.random.randn(nn_input_dim, hidden_dim) # (2,h)
b1 = np.random.randn(1, hidden_dim) # (1,h)
W2 = np.random.randn(hidden_dim, nn_output_dim) # (h,2)
b2 = np.random.randn(1, nn_output_dim) # (1,2)
# 写成dict方便使用参数
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# Gradient Descent!!!关键部分
for i in range(iterations):
#forward 过程和上面差不多
# X input shape (300,2)
z1 = X.dot(W1) + b1
# z1.shape (300,h)
a1 = np.tanh(z1)
# a1.shape (300,h)
z2 = a1.dot(W2) + b2
# z2.shape (300,2)
exp_z2 = np.exp(z2)
probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
# backward
delta3 = probs
# 使用结论 相等的地方减一 不等的地方保持原样反传(这个结论包含了交叉熵在内)
delta3[range(num_examples), y] -= 1 # 跳了三步 直接从交叉熵跳到了softmax之前
# delta3 (300,2) a1 (300,h)
dW2 = a1.T.dot(delta3) # (h,2)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) # tanh 求导
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)
W1 -= learning_rate * dW1
W2 -= learning_rate * dW2
b1 -= learning_rate * db1
b2 -= learning_rate * db2
model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
## 画图
x_line = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100)
y_line = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)
mesh_x, mesh_y = np.meshgrid(x_line, y_line)
z_prob = np.zeros_like(mesh_x)
for ix in range(z_prob.shape[0]):
for iy in range(z_prob.shape[1]):
z_prob[ix][iy] = inference(mesh_x[ix][iy], mesh_y[ix][iy], model)
loss = calculate_loss(model)
axes = plt.gca()
axes.cla()
axes.set_title(f'iteration: {i}, loss: {round(loss, 3)}')
plt.contourf(mesh_x, mesh_y, z_prob)
axes.scatter(X[:, 0], X[:, 1], c=y)
plt.pause(0.1)
return model
6. build_model(hidden_dim=10)
build_model(hidden_dim=10)
Result
最后收敛,效果不错。