双向LSTM实现实现

最新推荐文章于 2025-05-29 18:30:44 发布

原创最新推荐文章于 2025-05-29 18:30:44 发布 · 5.2k 阅读

6 ·

CC 4.0 BY-SA版权

机器学习同时被 2 个专栏收录

114 篇文章

订阅专栏

python编程

85 篇文章

订阅专栏

本文也就是用于图像分类，数据是minst数据，先看下tensorflow的版本，下面在代码低版本可能运行不了：

In [1]: import tensorflow as tf
In [2]: tf.__version__
Out[2]: '1.2.1'

我用的动态rnn来实现，双向rnn跟一层的rnn实际代码上没有太大的区别，主要要用tf.concat(outputs, 2) 最后的结果，下面看下代码怎么实现：

from  tensorflow.examples.tutorials.mnist import input_data
mnist=input_data.read_data_sets("../MNIST_data",one_hot=True)
import tensorflow as tf
import numpy as np
from tensorflow.python.ops.control_flow_ops import  constant_op
#参数设置
learning_rate=0.001
training_iters=100000
batch_size=128
display_step=10
#Network Parmeters
n_input=28  # MNIST data input (img shape: 28*28)
n_steps=28 # timesteps
n_hidden=128 #hidden layer num of features
n_class=10  #MNIST total class(0-9 digits)
#tf  graph input
x=tf.placeholder(tf.float32,[None,n_steps,n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate_fw=tf.placeholder(tf.float32,[None,2*n_hidden])
istate_bw=tf.placeholder(tf.float32,[None,2*n_hidden])
y=tf.placeholder(tf.float32,[None,n_class])
#define weights
weights={
    'hidden':tf.Variable(tf.random_normal([n_input,2*n_hidden])),
    'out':tf.Variable(tf.random_normal([2*n_hidden,n_class]))
}
biases={
    'hidden':tf.Variable(tf.random_normal([2*n_hidden,])),
    'out':tf.Variable(tf.random_normal([n_class,]))
}
def  BiRNN(_X,_isstate_fw,_istate_bw,_weights,_biases,_batch_size,_seq_len):
    _seq_len=tf.fill([_batch_size],constant_op.constant(_seq_len,dtype=tf.int64))
    # _X=tf.transpose(_X,[1,0,2])
    # _X=tf.reshape(_X,[-1,n_input])
    # _X=tf.matmul(_X,_weights['hidden'])+_biases['hidden']
    #
    # # Define lstm cells with tensorflow
    # lstm_fw_cell=tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    # # Backward direction cell
    # lstm_bw_cell=tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    # # Split data because rnn cell needs a list of inputs for the RNN inner loop
    # _X=tf.split(_X,n_steps,0)
    # # Get lstm cell output
    # outputs=tf.nn.static_bidirectional_rnn(lstm_fw_cell,lstm_bw_cell,_X,
    #                                         initial_state_fw=_isstate_fw,
    #                                         initial_state_bw=_iinputs must be a sequencestate_bw,
    #                                         sequence_length=_seq_lesequence_length=_seq_lenn)
    # Define lstm cells with tensorflow
    lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden,forget_bias=1.0)
    outputs,output_states= tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, _X,dtype=tf.float32)
    outputs=tf.concat(outputs, 2)
    outputs = tf.transpose(outputs, [1, 0, 2])
    return tf.matmul(outputs[-1],_weights['out'])+ _biases['out']
pred=BiRNN(x,istate_fw,istate_bw,weights,biases,batch_size,n_steps)
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred=tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init=tf.initialize_all_variables()
with tf.Session() as sess:
    sess.run(init)
    step=1
    while step*batch_size<training_iters:
        batch_xs,batch_ys=mnist.train.next_batch(batch_size)
        batch_xs=batch_xs.reshape(batch_size,n_steps,n_input)
        sess.run(optimizer,feed_dict={
            x:batch_xs,y:batch_ys,
            istate_fw:np.zeros((batch_size,2*n_hidden)),
            istate_bw:np.zeros((batch_size,2*n_hidden))
        })
        if step % display_step == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
                                                istate_fw: np.zeros((batch_size, 2 * n_hidden)),
                                                istate_bw: np.zeros((batch_size, 2 * n_hidden))})
            # Calculate batch loss
            loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
                                             istate_fw: np.zeros((batch_size, 2 * n_hidden)),
                                             istate_bw: np.zeros((batch_size, 2 * n_hidden))})
            print(
            "Iter " + str(step * batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
            ", Training Accuracy= " + "{:.5f}".format(acc))
        step += 1
    print(
    "Optimization Finished!")
    # Calculate accuracy for 128 mnist test images
    test_len = 128
    test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
    test_label = mnist.test.labels[:test_len]
    print("Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
                                                       istate_fw: np.zeros((test_len, 2 * n_hidden)),
                                                       istate_bw: np.zeros((test_len, 2 * n_hidden))}))

结果：

Iter 93440, Minibatch Loss= 0.107338, Training Accuracy= 0.96875
Iter 94720, Minibatch Loss= 0.043600, Training Accuracy= 0.98438
Iter 96000, Minibatch Loss= 0.085884, Training Accuracy= 0.97656
Iter 97280, Minibatch Loss= 0.113962, Training Accuracy= 0.96094
Iter 98560, Minibatch Loss= 0.135330, Training Accuracy= 0.95312
Iter 99840, Minibatch Loss= 0.130067, Training Accuracy= 0.97656
Optimization Finished!
Testing Accuracy: 0.984375