Tensor Flow shuffle_batch 的方式读csv文件的例子

本文通过一个简单的示例介绍了如何使用TensorFlow进行文件读取并利用shuffle_batch实现数据集的随机读取。该过程包括定义读取格式、创建文件队列、设置批处理参数等关键步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

用最简单的代码展示了一个tensor flow shuffle的方式读文件

代码

#coding=utf-8                                                                                                                                                                                                                                                                 

import tensorflow as tf
import numpy as np

def readMyFileFormat(fileNameQueue):
    reader = tf.TextLineReader()
    key, value = reader.read(fileNameQueue)

   record_defaults = [[1], [1], [1]]
    col1, col2, col3 = tf.decode_csv(value, record_defaults = record_defaults)
    features = tf.pack([col1, col2])
    label = col3
    return features, label

def inputPipeLine(fileNames = ["file0.csv", "file1.csv"], batchSize = 4, numEpochs = None):
    fileNameQueue = tf.train.string_input_producer(fileNames, num_epochs = numEpochs)
    example, label = readMyFileFormat(fileNameQueue)
    min_after_dequeue = 8
    capacity = min_after_dequeue + 3 * batchSize
    exampleBatch, labelBatch = tf.train.shuffle_batch([example, label], batch_size = batchSize, num_threads = 3,  capacity = capacity, min_after_dequeue = min_after_dequeue)
    return exampleBatch, labelBatch

featureBatch, labelBatch = inputPipeLine(["file0.csv", "file1.csv"], batchSize = 4)
with tf.Session() as sess:
    # Start populating the filename queue.                                                                                                                                                                                                                                    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

   # Retrieve a single instance:                                                                                                                                                                                                                                             
    try:
        #while not coord.should_stop():                                                                                                                                                                                                                                       
        while True:
            example, label = sess.run([featureBatch, labelBatch])
            print example
    except tf.errors.OutOfRangeError:
        print 'Done reading'
    finally:
        coord.request_stop()

   coord.join(threads)
    sess.close()

file0.csv 的内容

9,1,1
10,2,3
11,3,1
12,4,2

file1.csv 的内容

1,1,7
2,2,8
3,3,5
4,4,9
5,5,5
6,6,1
7,7,2
8,8,4
import tensorflow as tf from tensorflow.keras import layers, models, Input from tensorflow.keras.models import Model from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt import numpy as np import os import pathlib from tqdm.keras import TqdmCallback # 设置GPU gpus = tf.config.list_physical_devices("GPU") if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.list_logical_devices("GPU") print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: print(e) # 打印显卡信息,确认GPU可用 print(tf.config.get_visible_devices("GPU")) # 导入数据 data_dir = "./shixundata" # 修改后的数据集目录 data_dir = pathlib.Path(data_dir) # 检查数据集结构 if not os.path.exists(data_dir): print(f"数据集目录 {data_dir} 不存在") exit() if not os.path.exists(data_dir / "cat") or not os.path.exists(data_dir / "dog"): print(f"数据集目录 {data_dir} 应包含 cat 和 dog 子目录") exit() # 获取类别名称 class_names = ["cat", "dog"] print("类别名称:", class_names) # 数据预处理 batch_size = 8 img_height = 224 img_width = 224 # 加载数据 train_ds = tf.keras.utils.image_dataset_from_directory( data_dir, labels="inferred", # 从目录结构中推断标签 label_mode="int", # 返回整数标签 class_names=class_names, # 指定类别名称 validation_split=0.2, subset="training", seed=12, image_size=(img_height, img_width), batch_size=batch_size ) val_ds = tf.keras.utils.image_dataset_from_directory( data_dir, labels="inferred", label_mode="int", class_names=class_names, validation_split=0.2, subset="validation", seed=12, image_size=(img_height, img_width), batch_size=batch_size ) # 配置数据集 AUTOTUNE = tf.data.AUTOTUNE def preprocess_image(image, label): return (image / 255.0, label) # 归一化 train_ds = train_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE) val_ds = val_ds.map(preprocess_image, num_parallel_calls=AUTOTUNE) train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) # 构建VGG-16网络 def VGG16(nb_classes, input_shape): input_tensor = Input(shape=input_shape) # 第1层 x = Conv2D(64, (3,3), activation='relu', padding='same', name='block1_conv1')(input_tensor) x = Conv2D(64, (3,3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2,2), strides=(2,2), name='block1_pool')(x) # 第2层 x = Conv2D(128, (3,3), activation='relu', padding='same', name='block2_conv1')(x) x = Conv2D(128, (3,3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2,2), strides=(2,2), name='block2_pool')(x) # 第3层 x = Conv2D(256, (3,3), activation='relu', padding='same', name='block3_conv1')(x) x = Conv2D(256, (3,3), activation='relu', padding='same', name='block3_conv2')(x) x = Conv2D(256, (3,3), activation='relu', padding='same', name='block3_conv3')(x) x = MaxPooling2D((2,2), strides=(2,2), name='block3_pool')(x) # 第4层 x = Conv2D(512, (3,3), activation='relu', padding='same', name='block4_conv1')(x) x = Conv2D(512, (3,3), activation='relu', padding='same', nameblock='4_conv2')(x) x = Conv2D(512, (3,3), activation='relu', padding='same', name='block4_conv3')(x) x = MaxPooling2D((2,2), strides=(2,2), name='block4_pool')(x) # 第5层 x = Conv2D(512, (3,3), activation='relu', padding='same', name='block5_conv1')(x) x = Conv2D(512, (3,3), activation='relu', padding='same', name='block5_conv2')(x) x = Conv2D(512, (3,3), activation='relu', padding='same', name='block5_conv3')(x) x = MaxPooling2D((2,2), strides=(2,2), name='block5_pool')(x) # 全连接层 x = Flatten()(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) output_tensor = Dense(nb_classes, activation='softmax', name='predictions')(x) model = Model(input_tensor, output_tensor) return model # 模型构建 model = VGG16(len(class_names), (img_width, img_height, 3)) model.summary() # 编译 model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['accuracy']) # 训练模型 epochs = 10 history = model.fit( train_ds, epochs=epochs, validation_data=val_ds, callbacks=[TqdmCallback(verbose=1)] # 显示进度条 ) # 模型评估 plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) plt.plot(history.history['accuracy'], label='Training Accuracy') plt.plot(history.history['val_accuracy'], label='Validation Accuracy') plt.legend(loc='lower right') plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2) plt.plot(history.history['loss'], label='Training Loss') plt.plot(history.history['val_loss'], label='Validation Loss') plt.legend(loc='upper right') plt.title('Training and Validation Loss') plt.show() # 预测 plt.figure(figsize=(18, 3)) plt.suptitle("预测结果展示") for images, labels in val_ds.take(1): for i in range(8): ax = plt.subplot(1, 8, i + 1) plt.imshow(images[i].numpy()) img_array = tf.expand_dims(images[i], 0) predictions = model.predict(img_array) plt.title(f"预测: {class_names[np.argmax(predictions)]}") plt.axis("off")根据上述提问,如何修改该代码
最新发布
06-16
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值