25-Mnist04

本文介绍了一个基于TensorFlow的深度学习模型,用于识别MNIST数据集中的手写数字。模型采用卷积神经网络(CNN),通过多次迭代训练,实现了高精度的手写数字识别。
部署运行你感兴趣的模型镜像
from tensorflow.examples.tutorials.mnist.input_data import read_data_sets
import numpy as np
import cv2
import tensorflow as tf
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class Config:
    def __init__(self):
        self.sample_path = '../deeplearning_ai12/p07_mnist/MNIST_data'
        self.lr = 0.001
        self.epoches = 200
        self.batch_size = 20
        self.eps = 1e-10
        self.base_filters = 16  # should be 32 at least

        self.name = 'mnist04'
        self.save_path = '../models/{name}/{name}'.format(name=self.name)


class Tensors:
    def __init__(self, config: Config):
        self.config = config
        self.x = tf.placeholder(tf.float32, [None, 784], 'x')
        x = tf.reshape(self.x, [-1, 28, 28, 1])  # [-1, 28, 28, 1]
        logits = self.get_logits(x)  # [-1, 10]
        self.y_predict = tf.argmax(logits, axis=1, output_type=tf.int32)  # [-1]
        p = tf.nn.softmax(logits)   # [-1, 10]

        self.y = tf.placeholder(tf.int32, [None], 'y')
        y = tf.one_hot(self.y, 10)  # [-1, 10]

        p = tf.maximum(p, config.eps)
        self.loss = -tf.reduce_mean(tf.reduce_sum(y * tf.log(p), axis=1))
        opt = tf.train.AdamOptimizer(config.lr)
        self.train_op = opt.minimize(self.loss)

        self.precise = tf.reduce_mean(tf.cast(tf.equal(self.y, self.y_predict), tf.float32))

        params = 0
        for var in tf.trainable_variables():
            ps = _params(var.shape)
            print(var.name, var.shape, ps)
            params += ps
        print('-' * 200)
        print('Total:', params)

    def get_logits(self, x):
        """

        :param x: [-1, 28, 28, 1]
        :return: [-1, 10]
        """
        config = self.config
        filters = config.base_filters
        x = tf.layers.conv2d(x, filters, (3, 3), (1, 1), 'same',
                             activation=tf.nn.relu, name='conv1')  # [-1, 28, 28, 32]
        for i in range(2):
            filters *= 2
            x = tf.layers.conv2d(x, filters, 3, 1, 'same',
                                 name='conv2_%d' % i)  # [-1, 28, 28, 64]
            x = tf.layers.max_pooling2d(x, (2, 2), (2, 2), 'same')  # [-1, 14, 14, 64]
            x = tf.nn.relu(x)

        # x: [-1, 7, 7, 128]
        x = tf.layers.flatten(x)  # [-1, 7*7*128]
        x = tf.layers.dense(x, 1000, activation=tf.nn.relu, name='dense1')
        x = tf.layers.dense(x, 10, name='dense2')  # [-1, 10]
        return x


def _params(shape):
    result = 1
    for sh in shape:
        result *= sh.value
    return result


class Samples:
    def __init__(self, config):
        ds = read_data_sets(config.sample_path)

        self.train = SubSamples(ds.train)
        self.validation = SubSamples(ds.validation)
        self.test = SubSamples(ds.test)


class SubSamples:
    def __init__(self, data):
        self.data = data

    def num_examples(self):
        return self.data.num_examples

    def next_batch(self, batch_size):
        return self.data.next_batch(batch_size)  # xs: [batch_size, 784], ys: [batch_size]


def show_imgs(xs, ys):
    print(ys)
    xs = np.reshape(xs, [-1, 28, 28])
    xs = np.transpose(xs, [1, 0, 2])  # [28, -1, 28]
    xs = np.reshape(xs, [28, -1, 28 * 20])  # [28, -1, 560],
    xs = np.transpose(xs, [1, 0, 2])  # [-1, 28, 560]
    xs = np.reshape(xs, [-1, 28 * 20])

    cv2.imshow('My digits', xs)
    cv2.waitKey()


class App:
    def __init__(self, config: Config):
        self.config = config
        self.samples = Samples(config)

        g = tf.Graph()
        with g.as_default():
            self.tensors = Tensors(config)
            self.session = tf.Session(graph=g)
            self.saver = tf.train.Saver()

            try:
                self.saver.restore(self.session, config.save_path)
                print('Restore the model from %s successfully' % config.save_path)
            except:
                print('Fail to restore the model from %s, use a new model instead' % config.save_path)
                self.session.run(tf.global_variables_initializer())

    def close(self):
        self.session.close()

    def train(self):
        train_samples = self.samples.train
        config = self.config
        ts = self.tensors

        for epoch in range(config.epoches):
            batches = train_samples.num_examples() // config.batch_size
            for batch in range(batches):
                xs, ys = train_samples.next_batch(config.batch_size)
                _, loss_v = self.session.run([ts.train_op, ts.loss], {ts.x: xs, ts.y: ys})

                xs, ys = self.samples.validation.next_batch(config.batch_size)
                precise_v = self.session.run(ts.precise, {ts.x: xs, ts.y: ys})

                print('Epoch: %d, batch %d: loss=%.6f, precise=%.6f' % (epoch, batch, loss_v, precise_v))
            self.saver.save(self.session, config.save_path)
            print('Model saved into', config.save_path)
        print('Training is finished!')


if __name__ == '__main__':
    config = Config()
    app = App(config)

    app.train()
    app.close()

您可能感兴趣的与本文相关的镜像

TensorFlow-v2.15

TensorFlow-v2.15

TensorFlow

TensorFlow 是由Google Brain 团队开发的开源机器学习框架,广泛应用于深度学习研究和生产环境。 它提供了一个灵活的平台,用于构建和训练各种机器学习模型

#z_dim(輸入維度): 32 #activation 函數: tanh #atch_size: 32 import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from tensorflow.keras.layers import Dense, Flatten, Reshape from tensorflow.keras.models import Sequential # ---------- 參數設定 ---------- z_dim = 32 # 任務 1:改為32 batch_size = 32 # 任務 3:改為32 epochs = 1000 sample_interval = 500 half_batch = batch_size // 2 # ---------- 載入資料 ---------- (x_train, _), (_, _) = tf.keras.datasets.mnist.load_data() x_train = (x_train.astype(np.float32) - 127.5) / 127.5 x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) # ---------- 建立 Generator ---------- def build_generator(): model = Sequential([ Dense(128, activation='tanh', input_shape=(z_dim,)), # 任務 2:tanh Dense(784, activation='tanh'), Reshape((28, 28, 1)) ]) return model # ---------- 建立 Discriminator ---------- def build_discriminator(): model = Sequential([ Flatten(input_shape=(28, 28, 1)), Dense(128, activation='tanh'), # 任務 2:tanh Dense(1, activation='sigmoid') ]) return model # ---------- 組合 GAN ---------- generator = build_generator() discriminator = build_discriminator() discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) discriminator.trainable = False gan = Sequential([generator, discriminator]) gan.compile(loss='binary_crossentropy', optimizer='adam') # ---------- 顯示生成圖片 ---------- def visualize_generated_images(generator, epoch, examples=25, dim=(5,5), figsize=(6,6)): noise = np.random.normal(0, 1, (examples, z_dim)) gen_imgs = generator.predict(noise) gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(dim[0], dim[1], figsize=figsize) cnt = 0 for i in range(dim[0]): for j in range(dim[1]): axs[i,j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i,j].axis('off') cnt += 1 plt.suptitle(f"Generated Images at Epoch {epoch}") plt.show() # ---------- 訓練 ---------- for epoch in range(epochs): # 判別器訓練 idx = np.random.randint(0, x_train.shape[0], half_batch) real_imgs = x_train[idx] noise = np.random.normal(0, 1, (half_batch, z_dim)) fake_imgs = generator.predict(noise) d_loss_real = discriminator.train_on_batch(real_imgs, np.ones((half_batch, 1))) d_loss_fake = discriminator.train_on_batch(fake_imgs, np.zeros((half_batch, 1))) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # 生成器訓練 noise = np.random.normal(0, 1, (batch_size, z_dim)) g_loss = gan.train_on_batch(noise, np.ones((batch_size, 1))) # 顯示結果 if epoch % sample_interval == 0: print(f"{epoch} [D loss: {d_loss[0]:.4f}, acc.: {100*d_loss[1]:.2f}%] [G loss: {g_loss:.4f}]") visualize_generated_images(generator, epoch)圖片都雜訊
最新发布
05-10
<think>好的,用户的问题是关于使用TensorFlow的GAN生成MNIST图片时出现噪声的问题。我需要先理解GAN的基本结构,特别是生成器和判别器的训练过程。根据引用[1],判别器会接收真实图片和生成器产生的噪声信号,随着训练进行,生成器应该逐渐生成更真实的图片。如果生成器一直输出噪声,可能是训练不稳定或者模型结构有问题。 首先,我应该检查生成器的结构是否合适。MNIST是28x28的灰度图,生成器的输出层应该使用tanh或者sigmoid激活函数,确保像素值在合理范围内。如果使用线性激活,可能导致输出范围不合适,出现噪声。 然后,训练过程中的损失函数设计也很重要。生成器和判别器的损失需要平衡,如果判别器太强,生成器无法有效学习。可能需要调整学习率或者使用不同的优化器,比如Adam,并设置适当的学习率,例如0.0002,避免梯度消失或爆炸。 另外,批次归一化层可以帮助稳定训练,但如果在生成器的输出层使用可能会引入噪声,应该避免。此外,训练轮数可能不足,GAN通常需要较多的训练轮次才能生成清晰的图片,用户可能需要增加epochs。 还有,输入潜在向量的维度是否合适?通常使用100维的随机向量作为输入,如果维度太小,生成器可能无法捕捉足够的特征。同时,检查训练数据是否正常,数据预处理是否正确,比如归一化到[-1,1]或[0,1]之间。 引用[3]提到对抗训练和随机平滑的方法,虽然主要是针对鲁棒性模型,但其中提到的训练技巧可能对稳定GAN训练有帮助。例如,使用梯度惩罚或者不同的损失函数,如Wasserstein损失,可能改善生成效果。 最后,建议用户逐步调试,比如先简化模型结构,确保生成器能输出基本形状,再逐步增加复杂度。可视化训练过程中的生成图片,观察是否有逐渐改善的趋势,如果没有,可能需要调整超参数或模型架构。</think>针对TensorFlow GAN生成MNIST图片出现噪声的问题,以下是系统性解决方案: ### 一、模型结构优化 1. **生成器架构改进** - 输出层使用$tanh$激活函数(输出范围$[-1,1]$),与归一化后的输入数据匹配 - 添加批量归一化层(BatchNormalization)加速收敛 ```python def build_generator(): model = Sequential([ Dense(128 * 7 * 7, input_dim=100), # 输入维度建议100-200 Reshape((7,7,128)), BatchNormalization(), Conv2DTranspose(64, (5,5), strides=2, padding='same', activation='relu'), BatchNormalization(), Conv2DTranspose(1, (5,5), strides=2, padding='same', activation='tanh') ]) return model ``` 2. **判别器架构调整** - 使用LeakyReLU(斜率0.2)防止梯度消失 - 添加Dropout层(0.3-0.5)防止过拟合 ```python def build_discriminator(): model = Sequential([ Conv2D(64, (5,5), strides=2, padding='same', input_shape=(28,28,1)), LeakyReLU(0.2), Dropout(0.3), Conv2D(128, (5,5), strides=2, padding='same'), LeakyReLU(0.2), Dropout(0.3), Flatten(), Dense(1, activation='sigmoid') ]) return model ``` ### 二、训练策略优化 1. **损失函数选择** - 使用Wasserstein损失配合梯度惩罚(WGAN-GP)提升训练稳定性 $$L = \mathbb{E}_{\tilde{x} \sim \mathbb{P}_g}[D(\tilde{x})] - \mathbb{E}_{x \sim \mathbb{P}_r}[D(x)] + \lambda \mathbb{E}_{\hat{x} \sim \mathbb{P}_{\hat{x}}}[(||\nabla_{\hat{x}}D(\hat{x})||_2 - 1)^2]$$ 引用[3]中提到的对抗训练方法可增强模型鲁棒性 2. **学习率设置** - 生成器学习率(2e-4)略大于判别器(1e-4) - 使用Adam优化器(beta_1=0.5, beta_2=0.999) ### 三、数据预处理 1. **标准化处理** - 将像素值从$[0,255]$线性映射到$[-1,1]$,与生成器输出范围对齐 ```python (train_images, _), (_, _) = tf.keras.datasets.mnist.load_data() train_images = train_images.reshape(-1,28,28,1).astype('float32') train_images = (train_images - 127.5) / 127.5 # 归一化到[-1,1] ``` ### 四、训练过程监控 1. **可视化中间结果** ```python def generate_and_save_images(epoch, test_input): predictions = generator(test_input, training=False) plt.figure(figsize=(4,4)) for i in range(predictions.shape[0]): plt.subplot(4,4,i+1) plt.imshow(predictions[i,:,:,0]*127.5+127.5, cmap='gray') plt.axis('off') plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) ``` 2. **训练指标监控** - 判别器损失应保持在0.6-0.8之间 - 生成器损失应稳步下降 ### 五、典型问题诊断 | 现象 | 可能原因 | 解决方案 | |-------|---------|---------| | 生成全灰图像 | 模式崩溃 | 增加噪声输入维度,添加minibatch discrimination | | 黑白点状噪声 | 梯度爆炸 | 降低学习率,添加梯度裁剪 | | 模糊图像 | 生成器能力不足 | 增加网络深度,使用残差连接 |
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值