2 layer neural network implementation_neural implementation-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_34131692/article/details/109800447

这篇博客介绍了如何实现TensorFlow Playground的部分功能，重点是构建一个2层神经网络来处理circle数据分布。通过numpy、matplotlib和sklearn库生成并可视化数据，利用双曲正切激活函数进行正向推理，并设计了损失函数与模型。经过训练，模型最终成功收敛，展示出良好的分类效果。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Content

Assignment

实现TensorFlow Playground的部分功能。在这里插入图片描述
数据分布为circle，暂时写成2层，一层有多个神经元；用填充的等高线画出分类边界。

Code

1. import几个库：numpy、matplotlib（画图用）、sklearn（生成data用）。

from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt

2. 生成数据集
采用的是sklearn库中的方法，factor：两个circle的远近
-> Scale factor between inner and outer circle.

X, y = datasets.make_circles(n_samples=300, noise=0.2, factor=0.3)
nn_input_dim = 2
nn_output_dim = 2
num_examples = len(X)
learning_rate = 1e-2

plt.scatter(X[:,0],X[:,1],c=y)
plt.show()

预览一下是这样的：
在这里插入图片描述
3. 正向推理
从输入到输出的过程。没找到sigmoid就用了双曲正切，它的导数是 $1-tanh^2()$

def inference(x1, x2, model):
    # x1:scalar是第一个特征, X维度: (2,1)是样本
    X = np.hstack(([x1], [x2]))
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    # X.shape (2,)
    # W1.shape (2,hidden_dim) b1.shape (1, hidden_dim)，每一个隐藏层的神经元都有一个偏置
    # W2.shape (hidden_dim,2) b2.shape (1, 2)
    z1 = X.dot(W1) + b1 # z = wx + b
    a1 = np.tanh(z1) # activation
    z2 = a1.dot(W2) + b2 # 终点输出层
    # 第二次激不激活都可以 就没有激活
    exp_z2 = np.exp(z2) # softmax
    probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
    # probs.shape (1,2)
    return probs[0][0]

4. loss函数

def calculate_loss(model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1 = X.dot(W1) + b1
    # z1.shape (300,h)
    a1 = np.tanh(z1)
    # a1.shape (300,h)
    z2 = a1.dot(W2) + b2
    # z2.shape (300,2)
    exp_z2 = np.exp(z2)
    probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)
    log_probs = -np.log(probs[range(num_examples), y])
    loss = np.sum(log_probs)
    return loss / num_examples

5. 建立模型

def build_model(hidden_dim, iterations=1000):
    # 初始化
    W1 = np.random.randn(nn_input_dim, hidden_dim)  # (2,h)
    b1 = np.random.randn(1, hidden_dim)  # (1,h)
    W2 = np.random.randn(hidden_dim, nn_output_dim)  # (h,2)
    b2 = np.random.randn(1, nn_output_dim)  # (1,2)
	# 写成dict方便使用参数
    model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

    # Gradient Descent!!!关键部分
    for i in range(iterations):
        #forward 过程和上面差不多
        # X input shape (300,2)
        z1 = X.dot(W1) + b1
        # z1.shape (300,h)
        a1 = np.tanh(z1)
        # a1.shape (300,h)
        z2 = a1.dot(W2) + b2
        # z2.shape (300,2)
        exp_z2 = np.exp(z2)
        probs = exp_z2 / np.sum(exp_z2, axis=1, keepdims=True)

        # backward
        delta3 = probs
        # 使用结论 相等的地方减一 不等的地方保持原样反传（这个结论包含了交叉熵在内）
        delta3[range(num_examples), y] -= 1  # 跳了三步 直接从交叉熵跳到了softmax之前
        # delta3 (300,2) a1 (300,h)
        dW2 = a1.T.dot(delta3) # (h,2)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))  # tanh 求导
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis=0)

        W1 -= learning_rate * dW1
        W2 -= learning_rate * dW2
        b1 -= learning_rate * db1
        b2 -= learning_rate * db2

        model = {'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
        


		## 画图
        x_line = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 100)
        y_line = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 100)
        mesh_x, mesh_y = np.meshgrid(x_line, y_line)

        z_prob = np.zeros_like(mesh_x)
        for ix in range(z_prob.shape[0]):
            for iy in range(z_prob.shape[1]):
                z_prob[ix][iy] = inference(mesh_x[ix][iy], mesh_y[ix][iy], model)

        loss = calculate_loss(model)
        axes = plt.gca()
        axes.cla()
        axes.set_title(f'iteration: {i}, loss: {round(loss, 3)}')
        plt.contourf(mesh_x, mesh_y, z_prob)
        axes.scatter(X[:, 0], X[:, 1], c=y)

        plt.pause(0.1)

    return model