NN学习小结

最新推荐文章于 2024-09-24 18:49:27 发布

原创最新推荐文章于 2024-09-24 18:49:27 发布 · 497 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#NN #Tensorflow

学习小结专栏收录该内容

14 篇文章

订阅专栏

博客介绍了指数衰减学习率，还提及滑动平均，可记录参数过往值平均以增加模型泛化性。同时阐述正则化，在损失函数中给参数加权重抑制噪声、减少过拟合，包含L2和L1正则化，并给出通用模板。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

指数衰减学习率

#指数衰减学习率
#Learning_rate = LEARNING_RATE_BASE * LEARNING_RATE_DECAY * (global_step / LEARNING_RATE_BATCH_SIZE )
#
#--------------------------------------------------------------------
LEARNING_RATE_BASE = 0.1   #最初学习率
LEARNING_RATE_DECAY = 0.99 #学习衰减率
LEARNING_RATE_STEP  = 1    #多少轮BATCH_SIZE后，更新一次学习率，一般为（总样本/BATCH_SIZE）

global_step = tf.Variable(0, trainable=False) #记录当前轮数，运行几轮BATCH_SIZE的计数器，不被训练
#定义指数下降学习率
#staircase=True: 表示global_step/learning_rate_step取整数，学习率阶梯型衰减
#staircase=False：学习率是一条平滑下降的曲线
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, LEARNING_RATE_STEP, LEARNING_RATE_DECAY, staircase=True)
#待优化参数
w1 = tf.Variable(tf.random_normal([2, 1], stddev=1, seed=1))

y = tf.matmul(x, w1)
loss = tf.reduce_sum(tf.where(tf.greater(y, y_), (y - y_) * COST, (y_-y) * PROFIT))
#反向传播方式
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

滑动平均

记录每一个参数一段时间内过往值的平均，增加模型的泛化性。

#实例化滑动平均类
MOVING_AVERATE_DECAY = 0.99
ema = tf.train.ExponentialMovingAverage(MOVING_AVERATE_DECAY, global_step)
#ema.apply后面括号里面的是更新列表，每次运行sess.run(ema_op)时，对更新列表中的元素求滑动平均值
#在实际应用中会使用tf.trainable_variables自动将所有待训练的参数
ema_op = ema.apply(tf.trainable_variables())

with tf.Session() as sess:
    ……
    sess.run(ema_op)
    print(sess.run([w1, ema.average(w1)]))

正则化
在损失函数中给每个参数w加上权重，引入复杂度指标，从而抑制噪声，减少过拟合

def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

x = tf.placeholder(tf.float32, shape=(None, 2))
y_= tf.placeholder(tf.float32, shape=(None, 1))

w1 = get_weight([2, 11], 0.01)
b1 = get_bias([11])
y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

w2 = get_weight([11, 1], 0.01)
b2 = get_bias([1])
y = tf.matmul(y1, w2) + b2  #输出层不激活

loss_mse = tf.reduce_mean(tf.square(y - y_))
loss_total = loss_mse + tf.add_n(tf.get_collection('losses'))

#定义反向传播，不包含正则化
#train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_mse)
#定义反向传播，包含正则化
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss_total)

不包含正则化、L2和L1正则化

通用模板

def get_weight(shape, regularizer):
    w = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(regularizer)(w))
    return w

def get_bias(shape):
    b = tf.Variable(tf.constant(0.01, shape=shape))
    return b

REGULARIZER = 0.01

def forward(x, regularizer):
    w1 = get_weight([2, 11], regularizer)
    b1 = get_bias([11])
    y1 = tf.nn.relu(tf.matmul(x, w1) + b1)

    w2 = get_weight([11, 1], regularizer)
    b2 = get_bias([1])
    y = tf.matmul(y1, w2) + b2
    return y

LEARNING_RATE_BASE = 0.001  # 最初学习率
LEARNING_RATE_DECAY = 0.99  # 学习衰减率
LEARNING_RATE_STEP = 1  # 多少轮BATCH_SIZE后，更新一次学习率，一般为（总样本/BATCH_SIZE）

def backward():
    x = tf.placeholder(tf.float32, shape=(None, 2))
    y_ = tf.placeholder(tf.float32, shape=(None, 1))
    y = forward(x, REGULARIZER)
    global_step = tf.Variable(0, trainable=False)
    #############################################
    #type 1: y 与 y_的差距  均方误差
    loss = tf.reduce_mean(tf.square(y - y_))
    #type 2： 交叉熵
    ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.arg_max(y_, 1))
    loss = tf.reduce_mean(ce)
    #type 3: 加入正则化（y 与 y_的差距 + ）
    loss_total = loss + tf.add_n(tf.get_collection('losses'))

    # 指数衰减学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
                                               global_step,
                                               LEARNING_RATE_STEP,
                                               LEARNING_RATE_DECAY,
                                               staircase=True)
    # loss_total  global_step
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_total, global_step=global_step)

    # 滑动平均，下面三行
    MOVING_AVERATE_DECAY = 0.99
    ema = tf.train.ExponentialMovingAverage(MOVING_AVERATE_DECAY, global_step)
    ema_op = ema.apply(tf.trainable_variables())
    with tf.control_dependencies(train_step, ema_op):
        train_op = tf.no_op(name='train')

    with tf.Session() as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        for i in range(10000):
            sess.run(train_step, feed_dict={x: , y_:})