【TFRecord】-tensorflow
0.难点说明
mnist 数据集中
train 55000
test 5000
valid 10000
1.将MNIST数据集转化为TFRecord格式
import numpy as np
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data
def _int64_feature(value):
'''生成整数的属性'''
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
'''生成字符串型的属性'''
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def convert_to(mnist,name):
'''
将数据填入到tf.train.Example的协议缓存区(protocol buffer)
将缓存区序列化为一个字符串,通过tf.python_io.TFRecordWriter 写入 TFRcords文件
'''
images = mnist.train.images
labels = mnist.train.labels
pixels = images.shape[1]
num_examples = mnist.train.num_examples
filename = os.path.join(name+'.tfrecords')
writer = tf.python_io.TFRecordWriter(filename)
for i in range(num_examples):
image_raw = images[i].tostring()
example = tf.train.Example(features=tf.train.Features(
feature={
'pixels': _int64_feature(pixels),
'label': _int64_feature(np.argmax(labels[i])),
'image_raw': _bytes_feature(image_raw)}))
writer.write(example.SerializeToString())
writer.close()
def main(argv=None):
"""
主程序入口
声明处理MNIST数据集的类,这个类在初始化时会自动下载数据
"""
mnist = input_data.read_data_sets('MNIST_data/', dtype=tf.uint8, one_hot=True)
if mnist != None:
print("------------数据加载完毕----------------")
convert_to(mnist,'train')
if __name__ == '__main__':
tf.app.run ()
2.读取TFRecord 中的格式
import tensorflow as tf
reader = tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(['train.tfrecords'])
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'pixels': tf.FixedLenFeature([], tf.int64),
'label': tf.FixedLenFeature([], tf.int64)
})
images = tf.decode_raw(features['image_raw'], tf.uint8)
labels = tf.cast(features['label'], tf.int32)
pixels = tf.cast(features['pixels'], tf.int32)
sess = tf.Session()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10):
image, label, pixel = sess.run([images, labels, pixels])
print(label)’
参考
- 【TensorFlow】数据处理(将MNIST转为TFRecord)