关于变长数据的padding,由于tensorflow 2.0以后,推荐tf.data.dataset,所以试着将白裳的CRNN 的tfrecord读取改一下。比较坑的是,以前有tf.train.batch()这个函数,里面如果有SparesTensor的话,他是可以自动padding的,但是用dataset.padded_batch的时候,就不支持这个功能了,需要先tf.sparse_tensor_to_dense()这个函数转为普通的tensor,然后需要的话,再转换为SparesTensor
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import json
import time as time
import tensorflow as tf
import numpy as np
from crnn_model import model
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
# ------------------------------------Basic prameters------------------------------------
tf.app.flags.DEFINE_string(
'data_dir', '../tfrecords/', 'Path to the directory containing data tf record.')
#-------------------------------------for sparse_tensor----------------------------------
tf.app.flags.DEFINE_integer(
'sparse_num', 10000, 'for sparse_tensor num')
FLAGS = tf.app.flags.FLAGS
def _parse_function(serialized_example):
features = tf.parse_single_example(serialized_example,
features={
'images': tf.FixedLenFeature([], tf.string),
# 'labels': tf.FixedLenFeature([], tf.int64),
'labels': tf.VarLenFeature(tf.int64), #不定长
'imagenames': tf.FixedLenFeature([], tf.string),
})
images = tf.image.decode_jpeg(features['images'])
images.set_shape([32, None, 3]) # dynamic width
images = tf.cast(images, tf.float32)
labels = tf.sparse_tensor_to_dense(features['labels']) #转为普通tensor
labels_length=tf.shape(labels)
# labels = tf.cast(features['labels'], tf.int32)
sequence_length = tf.cast(tf.shape(images)[-2] / 4, tf.int32) # for vgg backbone,featuremap是缩小4倍的,这样可以直接传入
# imagenames = features['imagenames']
return images, labels,sequence_length,labels_length
def _train_crnn_ctc():
tfrecord_path = os.path.join(FLAGS.data_dir, 'train.tfrecord')
# Creates a dataset that reads all of the examples from two files, and extracts
# the image and label features.
dataset = tf.data.TFRecordDataset(tfrecord_path)
dataset = dataset.map(_parse_function)
dataset = dataset.repeat(10000)
# dataset = dataset.shuffle(buffer_size=1000)
padded_shapes = ( #定义变量shape
tf.TensorShape([32, None, 3]),
tf.TensorShape([None]),
tf.TensorShape([]),
tf.TensorShape([1])
)
dataset = dataset.padded_batch(FLAGS.batch_size,
# padded_shapes=dataset.output_shapes,
padded_shapes=padded_shapes,
padding_values=(tf.constant(0, dtype=tf.float32), #定义常量
tf.constant(FLAGS.sparse_num, dtype=tf.int64),
tf.constant(32, dtype=tf.int32),
tf.constant(0, dtype=tf.int32)))
iterator = dataset.make_initializable_iterator()
# # decode the training data from tfrecords #这是以前的方法,支持sparestensor动态调整,不过tensorflow2.0将不能用
# batch_images, batch_labels, batch_sequence_lengths = tf.train.batch(
# tensors=[images, labels, sequence_lengths], batch_size=FLAGS.batch_size, dynamic_pad=True,
# capacity=1000 + 2*FLAGS.batch_size, num_threads=FLAGS.num_threads)
with tf.Session() as sess:
# init all variables
sess.run(init_op)
sess.run(iterator.initializer)
next_element = iterator.get_next()
print(next_element)
imgs_tensor, lable_dense_tensor, seq_lens_tensor,lable_legth_tensor = iterator.get_next()
#----------------转为SapresTensor--------------------------------------
dense = lable_dense_tensor
zero = tf.constant(FLAGS.sparse_num, dtype=tf.int64)
where = tf.not_equal(dense, zero)
indices = tf.where(where)
values = tf.gather_nd(dense, indices)
sparse = tf.SparseTensor(indices, values, dense_shape=tf.shape(dense, out_type=tf.int64))
#-----------------------------------------------------------------------
time_line=[]
for step in range(FLAGS.max_train_steps):
first = time.time()
imgs,lbls, seq_lens = sess.run([imgs_tensor,sparse, seq_lens_tensor]) #获取变量
参考链接:
dataset.padded_batch 用法案例
用tensorflow DataSet高效加载变长文本输入
sparse-matrix-from-a-dense-one-tensorflow
converting-tensor-to-a-sparsetensor-for-ctc-loss
tf.gather