tensorflow总结帖

本文详细介绍使用TensorFlow进行模型训练的过程,包括Saver对象创建、模型保存与恢复的方法;并介绍了TensorFlow中的多种基本操作如tf.slice和tf.cast等的使用方法;还涉及变量定义、图(graph)构造及tf.nn.relu等激活函数的应用;此外,还提供了单层神经网络的实现示例,并讲解如何使用dropout和批量归一化(Batch Normalization)来优化模型。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

内存不释放
kill -9 进程号

saver
link
saver保存最新的max_to_keep个模型,至于按照什么指标保存,看代码设定

#### 创建saver对象
saver = tf.train.Saver(var_list=tf.trainable_variables(), max_to_keep=args.max_checkpoints)

#### 模型保存
# 自动对step进行+1(保存的名字也是带上step的)
saver.save(sess, checkpoint_path, global_step=step)

#### 模型恢复
ckpt = tf.train.get_checkpoint_state(logdir) # 获取所有文件名
ckpt.model_checkpoint_path # 读取最新保存的模型
saver.restore(sess, ckpt.model_checkpoint_path)

tf.slice(input_, begin, size, name = None)
从原始数据中,选出一块。起始点为begin,从起始点开始,选出size大小的数据。
类似于直接选择数据[begin[0]:begin[0]+size[0]+1,xx,xx]

tf.cast:
类型转化

##将global_step + 1转为float32类型
step = tf.cast(global_step + 1, dtype=tf.float32)

tf.variable_scope介绍:

## 在zyy命名空间中,创建了变量v
with tf.variable_scope("zyy"):#zyy的命名空间
        v=tf.get_variable("v",[1],initializer=tf.constant_initializer(1.0))

如果想要调用变量v,需要设置reuse为true
with tf.variable_scope("zyy",reuse=True):
      v1=tf.get_variable("v",[1])

变量定义:

#随机产生100个数,类型为float32
x_data = np.random.rand(100).astype(np.float32)
y_data = x_data*0.1 + 0.3

x_data = np.linspace(-1,1,300)[:, np.newaxis]
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise

graph构造部分:
tf一般框架

Weights = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
biases = tf.Variable(tf.zeros([1]))

y = Weights*x_data + biases

loss = tf.reduce_mean(tf.square(y-y_data))
optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(loss)

init = tf.global_variables_initializer()
#tf构建变量
Weights = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
biases = tf.Variable(tf.zeros([1]))
Weights = tf.Variable(tf.random_normal([in_size, out_size]))
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
W = tf.Variable([[1,2,3],[3,4,5]], dtype=tf.float32, name='weights') #变量值为常量,类型以及名字
state = tf.Variable(0, name='counter') #带名字的变量
#tf构建常量
matrix1 = tf.constant([[3, 3]])
matrix2 = tf.constant([[2],
                       [2]])
one = tf.constant(1)

tf操作

product = tf.matmul(matrix1, matrix2)# 矩阵乘积
Wx_plus_b = tf.matmul(inputs, Weights) + biases #依旧是矩阵乘法
new_value = tf.add(state, one) #两数相加 
update = tf.assign(state, new_value) # new_value->state
ouput = tf.multiply(input1, input2) #两数相乘

tf激活函数

tf.nn.relu

placeholder操作(占位符,sess接收input值)

input1 = tf.placeholder(tf.float32)
input2 = tf.placeholder(tf.float32)
#设置占位符的size
xs = tf.placeholder(tf.float32, [None, 1])

with tf.Session() as sess:
    print(sess.run(ouput, feed_dict={input1: [7.], input2: [2.]}))
	print(sess.run(loss, feed_dict={xs: x_data, ys: y_data}))
	#无论是查看中间变量还是训练,都要加上feed_dict

tf优化函数

train = tf.train.GradientDescentOptimizer(0.5).minimize(loss) # SGD
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) # Adam

tf建立一个单层NN网络,可以用的函数

def add_layer(inputs, in_size, out_size, activation_function=None):
    Weights = tf.Variable(tf.random_normal([in_size, out_size]))
    biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
    Wx_plus_b = tf.matmul(inputs, Weights) + biases
    if activation_function is None:
        outputs = Wx_plus_b
    else:
        outputs = activation_function(Wx_plus_b)
    return outputs

计算测试集上效果的函数(因为想要得到的测试集上的结果和loss不一样,所以要重新建一个graph
先 run 得到一个中间值,再利用中间值建立一个graph,在从头到尾run一次(分次run的思想)

def compute_accuracy(v_xs, v_ys):
    global prediction
    y_pre = sess.run(prediction, feed_dict={xs: v_xs})
    correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys})
    return result

计算交叉熵

cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
                                              reduction_indices=[1]))       # loss

单层网络加上dropout机制

def add_layer(inputs, in_size, out_size, layer_name, activation_function=None, ):
    # add one more layer and return the output of this layer
    Weights = tf.Variable(tf.random_normal([in_size, out_size]))
    biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, )
    Wx_plus_b = tf.matmul(inputs, Weights) + biases
    # here to dropout
    Wx_plus_b = tf.nn.dropout(Wx_plus_b, keep_prob)
    if activation_function is None:
        outputs = Wx_plus_b
    else:
        outputs = activation_function(Wx_plus_b, )
    tf.summary.histogram(layer_name + '/outputs', outputs)
    return outputs
#训练时dropout要设置为0.5
sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})

#显示网络结果时,要用全部网络,dropout为1
train_result = sess.run(merged, feed_dict={xs: X_train, ys: y_train, keep_prob: 1})
test_result = sess.run(merged, feed_dict={xs: X_test, ys: y_test, keep_prob: 1})

BN代码,最主要的是ema对于值的更新(train和test不一样)
传送门
BN代码主要部分,虽然有很多看的不是很懂。

 # Batch Normalize
            fc_mean, fc_var = tf.nn.moments(
                Wx_plus_b,
                axes=[0],   # the dimension you wanna normalize, here [0] for batch
                            # for image, you wanna do [0, 1, 2] for [batch, height, width] but not channel
            )
            scale = tf.Variable(tf.ones([out_size]))
            shift = tf.Variable(tf.zeros([out_size]))
            epsilon = 0.001

            # apply moving average for mean and var when train on batch
            ema = tf.train.ExponentialMovingAverage(decay=0.5)
            def mean_var_with_update():
                ema_apply_op = ema.apply([fc_mean, fc_var])
                with tf.control_dependencies([ema_apply_op]):
                    return tf.identity(fc_mean), tf.identity(fc_var)
            mean, var = mean_var_with_update()

            Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b, mean, var, shift, scale, epsilon)
            # similar with this two steps:
            # Wx_plus_b = (Wx_plus_b - fc_mean) / tf.sqrt(fc_var + 0.001)
            # Wx_plus_b = Wx_plus_b * scale + shift

session部分:

#session一般结构
sess = tf.Session()

sess.run(init)

for step in range(201):
    sess.run(train)
    if step % 20 == 0:
        print(step, sess.run(Weights))

# 多个session一起运行
 _, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值