tfreocrd_CRNN_使用-优快云博客

本文链接：https://blog.youkuaiyun.com/loovelj/article/details/100321320

关于变长数据的padding，由于tensorflow 2.0以后，推荐tf.data.dataset，所以试着将白裳的CRNN 的tfrecord读取改一下。比较坑的是，以前有tf.train.batch()这个函数，里面如果有SparesTensor的话，他是可以自动padding的，但是用dataset.padded_batch的时候，就不支持这个功能了，需要先tf.sparse_tensor_to_dense()这个函数转为普通的tensor，然后需要的话，再转换为SparesTensor

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import json
import time as time
import tensorflow as tf

import numpy as np

from crnn_model import model

# os.environ["CUDA_VISIBLE_DEVICES"]="0"

# ------------------------------------Basic prameters------------------------------------
tf.app.flags.DEFINE_string(
    'data_dir', '../tfrecords/', 'Path to the directory containing data tf record.')

#-------------------------------------for sparse_tensor----------------------------------
tf.app.flags.DEFINE_integer(
    'sparse_num', 10000, 'for sparse_tensor num')
    
FLAGS = tf.app.flags.FLAGS

def _parse_function(serialized_example):
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'images': tf.FixedLenFeature([], tf.string),
                                           # 'labels': tf.FixedLenFeature([], tf.int64),
                                           'labels': tf.VarLenFeature(tf.int64), #不定长
                                           'imagenames': tf.FixedLenFeature([], tf.string),
                                       })
    images = tf.image.decode_jpeg(features['images'])
    images.set_shape([32, None, 3])  # dynamic width
    images = tf.cast(images, tf.float32)
    labels = tf.sparse_tensor_to_dense(features['labels'])  #转为普通tensor
    labels_length=tf.shape(labels)

    # labels = tf.cast(features['labels'], tf.int32)
    sequence_length = tf.cast(tf.shape(images)[-2] / 4, tf.int32)  # for vgg backbone,featuremap是缩小4倍的，这样可以直接传入
    # imagenames = features['imagenames']
    return images,  labels,sequence_length,labels_length

def _train_crnn_ctc():
    tfrecord_path = os.path.join(FLAGS.data_dir, 'train.tfrecord')
    # Creates a dataset that reads all of the examples from two files, and extracts
    # the image and label features.
    dataset = tf.data.TFRecordDataset(tfrecord_path)
    dataset = dataset.map(_parse_function)
    dataset = dataset.repeat(10000)

    # dataset = dataset.shuffle(buffer_size=1000)

    padded_shapes = (          #定义变量shape
        tf.TensorShape([32, None, 3]),
        tf.TensorShape([None]),
        tf.TensorShape([]),
        tf.TensorShape([1])
    )

    dataset = dataset.padded_batch(FLAGS.batch_size,
                                   # padded_shapes=dataset.output_shapes,
                                   padded_shapes=padded_shapes,
                                   padding_values=(tf.constant(0, dtype=tf.float32),  #定义常量
                                                   tf.constant(FLAGS.sparse_num, dtype=tf.int64),
                                                   tf.constant(32, dtype=tf.int32),
                                                   tf.constant(0, dtype=tf.int32)))

    iterator = dataset.make_initializable_iterator()

    # # decode the training data from tfrecords       #这是以前的方法，支持sparestensor动态调整，不过tensorflow2.0将不能用                
    # batch_images, batch_labels, batch_sequence_lengths = tf.train.batch(
    #     tensors=[images, labels, sequence_lengths], batch_size=FLAGS.batch_size, dynamic_pad=True,
    #     capacity=1000 + 2*FLAGS.batch_size, num_threads=FLAGS.num_threads)

    with tf.Session() as sess:
     
        # init all variables
        sess.run(init_op)

        sess.run(iterator.initializer)
        next_element = iterator.get_next()
        print(next_element)

        imgs_tensor, lable_dense_tensor, seq_lens_tensor,lable_legth_tensor = iterator.get_next()

#----------------转为SapresTensor--------------------------------------
        dense = lable_dense_tensor
        zero = tf.constant(FLAGS.sparse_num, dtype=tf.int64)
        where = tf.not_equal(dense, zero)
        indices = tf.where(where)
        values = tf.gather_nd(dense, indices)
        sparse = tf.SparseTensor(indices, values, dense_shape=tf.shape(dense, out_type=tf.int64))
#-----------------------------------------------------------------------
        time_line=[]
        for step in range(FLAGS.max_train_steps):

            first = time.time()
            imgs,lbls, seq_lens = sess.run([imgs_tensor,sparse, seq_lens_tensor])  #获取变量