验证码识别rnn训练模型代码

最新推荐文章于 2022-01-29 21:19:01 发布

原创最新推荐文章于 2022-01-29 21:19:01 发布 · 369 阅读

1 ·

CC 4.0 BY-SA版权

Author:baiyun ,Email:mitbaiyun@163.com

深度学习专栏收录该内容

16 篇文章

订阅专栏

本文介绍了一种使用卷积神经网络(CNN)进行图像验证码识别的方法。通过搭建包含卷积层、池化层及全连接层的神经网络模型，并采用TensorFlow实现训练过程。实验中使用的验证码图像大小为60x160像素，由数字和大小写字母组成。

机器性能太弱，不能设置太多参数，梯度下降难

在这里插入图片描述

import numpy as np
import  random
from PIL import Image
import matplotlib.pyplot as plt
import tensorflow as tf
def code_cnn(x,y):
    #conv->relu->max_pool->conv->relu->max_pool->dropout
    # ->conv->relu->max_pool->full_connection->softmax
    with tf.variable_scope('net'):
        x_shape=x.get_shape()
        in_channels=x_shape[3]
        # print(in_channels)
        y_shape=y.get_shape()
        with tf.variable_scope('conv1',initializer=tf.random_normal_initializer(0,0.1),dtype=tf.float32):
            kernel_size_1=1
            w=tf.get_variable('w',shape=[3,3,in_channels,kernel_size_1])
            b=tf.get_variable('b',shape=[kernel_size_1])
            net=tf.nn.conv2d(x,w,strides=[1,1,1,1],padding='SAME')
            net=tf.nn.bias_add(net,b)
        with tf.variable_scope('relu1'):
            net=tf.nn.relu6(net)
        with tf.variable_scope('max_pool1'):
            tf.nn.max_pool(net,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        with tf.variable_scope('conv2'):
            kernel_size_2 = 1
            w = tf.get_variable('w', shape=[3, 3, kernel_size_1, kernel_size_2])
            b = tf.get_variable('b', shape=[kernel_size_2])
            net = tf.nn.conv2d(net, w, strides=[1, 1, 1, 1], padding='SAME')
            net = tf.nn.bias_add(net, b)
        with tf.variable_scope('relu2'):
            net = tf.nn.relu6(net)
        with tf.variable_scope('max_pool2'):
            tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        with tf.variable_scope('dropout1'):
            tf.nn.dropout(net,keep_prob=0.75)
        with tf.variable_scope('conv3'):
            kernel_size_3 = 64
            w = tf.get_variable('w', shape=[11, 11, kernel_size_2, kernel_size_3])
            b = tf.get_variable('b', shape=[kernel_size_3])
            net = tf.nn.conv2d(net, w, strides=[1, 2, 2, 1], padding='SAME')
            net = tf.nn.bias_add(net, b)
        with tf.variable_scope('relu3'):
            net = tf.nn.relu6(net)
        with tf.variable_scope('max_pool3'):
            tf.nn.max_pool(net, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        with tf.variable_scope('fc1'):
            unit_number_1=124
            net_shape=net.get_shape()
            net_sample_feature_number=net_shape[1]*net_shape[2]*net_shape[3]
            net=tf.reshape(net,shape=[-1,net_sample_feature_number])#-1 所有元素按照每行net_sample_feature_number列组合，即成一行
            w=tf.get_variable('w',shape=[net_sample_feature_number,unit_number_1])
            b=tf.get_variable('b',shape=[unit_number_1])
            net=tf.add(tf.matmul(net,w),b)
        with tf.variable_scope('softmax'):
            unit_number_3= 4
            w = tf.get_variable('w', shape=[unit_number_1,unit_number_3])
            b = tf.get_variable('b', shape=[unit_number_3])
            net = tf.add(tf.matmul(net, w), b)
            act=tf.nn.softmax(net)
    return act
def random_next_batch(batch_size=1,code_size=4):#太多oom
    batch_x=[]
    batch_y=[]
    for i in range(batch_size):
        text,image=generate_code_image(code_size)
        code_number=[]
        for ch in text:
            code_number.append(code_char_2number_dict[ch])
        batch_x.append(image)
        batch_y.append(code_number)
    return np.array(batch_x),np.array(batch_y)
from captcha.image import ImageCaptcha

code_char_set = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
        'w', 'x', 'y', 'z',
        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
        'W', 'X', 'Y', 'Z']


def random_code_text(code_size):
    code_text=[]
    for i in range(code_size):
        c=random.choice(code_char_set)
        code_text.append(c)
    return code_text
code_char_2number_dict=dict(zip(code_char_set,range(len(code_char_set))))
code_number_2_char_dict=dict(zip(range(len(code_char_set)),code_char_set))
# print(code_number_2_char_dict)
def generate_code_image(code_size):
    image=ImageCaptcha()
    code_text=random_code_text(code_size)
    code_text=''.join(code_text)
    captcha=image.generate(code_text)
    # image.write(code_text,'captcha/'+code_text+'.jpg')
    # print(captcha)
    code_image=Image.open(captcha)
    code_image=np.array(code_image)
    return code_text,code_image
def train_code_cnn(mode_path):
    in_image_height=60
    in_image_weight=160
    code_size=4
    x=tf.placeholder(tf.float32,shape=[None,in_image_height,in_image_weight,1],name='x')
    y=tf.placeholder(tf.float32,shape=[None,code_size],name='y')
    network=code_cnn(x,y)
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=network,labels=y))
    # cost=tf.reduce_mean(tf.reduce_sum(tf.cast(tf.not_equal(y,network),tf.float32),axis=1))#不支持
    # cost=tf.reduce_mean(tf.reduce_sum(tf.square(y-network),axis=1))
    train=tf.train.AdamOptimizer(learning_rate=0.01).minimize(cost)
    accuracy=tf.reduce_mean(
        tf.cast(tf.equal(tf.reduce_sum(tf.cast(tf.equal(y,network),tf.int32),axis=1),4),tf.float32))#4个字母都正确
    saver=tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(device_count={'cpu':0})) as sess:
        sess.run(tf.global_variables_initializer())
        step=0
        while True:
            batch_x,batch_y=random_next_batch(batch_size=64,code_size=code_size)
            batch_x=tf.image.rgb_to_grayscale(batch_x)
            batch_x=tf.image.resize_images(batch_x,size=(in_image_height,in_image_weight)
                                           ,method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            _,cost_,accuracy_=sess.run([train,cost    ],feed_dict={x:batch_x.eval(),y:batch_y} )
            print('step:{},cost:{},accuracy:{}'.format(step,cost_,accuracy_))
            if step % 10 == 0:
                test_batch_x, test_batch_y = random_next_batch(64, code_size=4)
                test_batch_x = tf.image.rgb_to_grayscale(test_batch_x)
                test_batch_x = tf.image.resize_images(test_batch_x, size=(in_image_height, in_image_weight)
                                                 , method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                acc = sess.run(accuracy, feed_dict={x: test_batch_x.eval(), y: test_batch_y})
                print('测试集准确率：{}'.format(acc))
                if acc>0.7 and accuracy_>0.7:
                    saver.save(sess,mode_path,global_step=step)
                    break
            step+=1

    pass
if __name__ == '__main__':
    # batch_x,batch_y=random_next_batch(10,4)
    # print(np.array(batch_x).shape)
    # print(batch_y)
    # # text,image=generate_code_image(4)
    # text='aaaa'
    # image=batch_x[0]
    # for ch in text:
    #     print(ch)
    # ax=plt.figure()
    # print(code_char_2number_dict['Z'])
    # ax.text(0.1,0.9,text,ha='center',va='center')
    # plt.imshow(image)
    # plt.show()
    train_code_cnn('./savepath/capcha.model')