#encoding:utf-8
import numpy as np
import struct
import gzip
a = 'D:/src/tensorflow/data_sets/MNIST_data/train-images-idx3-ubyte.gz'
b = 'D:/src/tensorflow/data_sets/MNIST_data/train-labels-idx1-ubyte.gz'
c = 'D:/src/tensorflow/data_sets/MNIST_data/t10k-images-idx3-ubyte.gz'
d = 'D:/src/tensorflow/data_sets/MNIST_data/t10k-labels-idx1-ubyte.gz'
def decode_idx3_ubyte(idx3_ubyte_file):
fp = gzip.open(idx3_ubyte_file,'rb')
bin_data = fp.read()
offset = 0
fmt_header = '>iiii'
magic_number,num_images,num_rows,num_cols = struct.unpack_from(fmt_header,bin_data,offset)
print ('魔数:%d, 图片数量:%d张,图片大小:%d%d' % (magic_number,num_images,num_rows,num_cols))
image_size = num_rows*num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>'+str(image_size)+'B'
images = np.empty((num_images,num_rows,num_cols))
for i in range(num_images):
if (i+1)%10000 == 0:
print ('已解析 %d' % (i+1) + '张')
images[i] = np.array(struct.unpack_from(fmt_image,bin_data,offset)).reshape((num_rows,num_cols))
offset += struct.calcsize(fmt_image)
return images
def decode_idx1_ubyte(idx1_ubyte_file):
fp = gzip.open(idx1_ubyte_file,'rb')
bin_data = fp.read()
offset = 0
fmt_header = '>ii'
magic_number,num_images = struct.unpack_from(fmt_header,bin_data,offset)
print ('魔数:%d, 图片数量:%d张' % (magic_number,num_images))
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
if (i+1)%10000 == 0:
print ('已解析 %d' % (i+1) + '张')
labels[i] = struct.unpack_from(fmt_image,bin_data,offset)[0]
offset += struct.calcsize(fmt_image)
return labels
def load_mnist_data(train_images=a,train_labels=b,test_images=c,test_labels=d):
"""decode the mnist local train and test data."""
t_images = decode_idx3_ubyte(train_images)
t_labels = decode_idx1_ubyte(train_labels)
v_images = decode_idx3_ubyte(test_images)
v_labels = decode_idx1_ubyte(test_labels)
return (t_images,t_labels),(v_images,v_labels)