搞学习深度学习,我第一个做的小项目就是这个 ,验证码识别,前前后后花了一个多月的时间,还是很菜,下面是我的成果,大部分代码是自己写的,部分借鉴了别人的。然后分享一点我的经验,训练的时候要有耐心,只要损失值在变小,或者在波动(但准确率却没多大变化),就说明大致的方向没有错,坚持等下去,我这个是在训练到3000多次的时候,损失值发生了突变,走上正轨了。
训练代码:
import numpy as np
import tensorflow as tf
from captcha.image import ImageCaptcha
import matplotlib.pyplot as plt
import os
import random
from PIL import Image
print(3)
number = ['0','1','2','3','4','5','6','7','8','9']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
def random_captcha_text(char_set=number+alphabet+ALPHABET,captcha_size=4):
captcha_text=[]
for i in range (captcha_size):
c=random.choice(char_set)
captcha_text.append(c)
return captcha_text
def gen_captcha_text_and_image():
# while(1):
image=ImageCaptcha()
captcha_text=random_captcha_text()
captcha_text=''.join(captcha_text)
captcha_image=image.generate(captcha_text)
captcha_image=Image.open(captcha_image)
captcha_image=np.array(captcha_image)
# if captcha_image.shape==(60,160,30):
# break
return captcha_text,captcha_image
text,image=gen_captcha_text_and_image()
IMAGE_HEIGHT =60
IMAGE_WIDTH = 160
MAX_CAPTCHA =len(text)
char_set =number+alphabet+ALPHABET+['_']
CHAR_SET_LEN = len(char_set)
print(CHAR_SET_LEN)
print("验证码文本最长字符数",MAX_CAPTCHA)
def convert2gray(img):
if len(img.shape)>2:
gray =np.mean(img,-1)
# print(gray)
return gray
else:
return img
print ()
def text2vec(text):
text_len=len(text)
if text_len>MAX_CAPTCHA:
raise ValueError('验证码最长4个字符')
vector = np.zeros(MAX_CAPTCHA*CHAR_SET_LEN)
def char2pos(c):
if c=='_':
k=62
return k
k=ord(c)-48
if k>9:
k=ord(c)-55
if k>35:
k=ord(c)-61
if k>61:
raise ValueError('No Map')
return k
for i,c in enumerate(text):
idx = i* CHAR_SET_LEN + char2pos(c)
vector[idx]=1
return vector
def vec2text(vec):
char_pos= vec.nonzero()[0]
text = []
for i, c in enumerate(char_pos):
char_at_pos =i
char_idx= c%CHAR_SET_LEN
if char_idx<10:
char_code =char_idx +ord('0')
elif char_idx<36:
char_code=char_idx-10+ord('A')
elif char_idx <62:
char_code=char_idx -36 + ord('a')
elif char_idx==62:
char_code= ord('_')
else:
raise ValueError('error')
text.append(chr(char_code))
return " ".join(text)
def get_next_batch(batch_size=128):
batch_x=np.zeros([batch_size,IMAGE_HEIGHT*IMAGE_WIDTH])
batch_y=np.zeros([batch_size,MAX_CAPTCHA*CHAR_SET_LEN])
def wrap_gen_captcha_text_and_image():
while True:
text,image=gen_captcha_text_and_image()
if image.shape==(60,160,3):
return text,image
for i in range(batch_size):
text,image = wrap_gen_captcha_text_and_image()
image= convert2gray(image)
batch_x[i,:]=image.flatten()/255
batch_y[i,:]=text2vec(text)
return batch_x,batch_y
X=tf.placeholder(tf.float32,[None,IMAGE_HEIGHT*IMAGE_WIDTH])
Y=tf.placeholder(tf.float32,[None,MAX_CAPTCHA*CHAR_SET_LEN])
keep_prob=tf.placeholder(tf.float32)
def crack_captcha_cnn(w_alpha=0.01,b_alpha=0.1):
x=tf.reshape(X,shape=[-1,IMAGE_HEIGHT,IMAGE_WIDTH,1])
w_c1 = tf.Variable(w_alpha*tf.random_normal([3,3,1,32]))
b_c1 = tf.Variable(b_alpha*tf.random_normal([32]))
conv1=tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x,w_c1,strides=[1,1,1,1],padding='SAME'),b_c1))
conv1=tf.nn.max_pool(conv1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv1=tf.nn.dropout(conv1,keep_prob)
w_c2=tf.Variable(w_alpha*tf.random_normal([3,3,32,64]))
b_c2=tf.Variable(b_alpha*tf.random_normal([64]))
conv2=tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1,w_c2,strides=[1,1,1,1],padding='SAME'),b_c2))
conv2=tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv2=tf.nn.dropout(conv2,keep_prob)
w_c3=tf.Variable(w_alpha*tf.random_normal([3,3,64,64]))
b_c3=tf.Variable(b_alpha*tf.random_normal([64]))
conv3=tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2,w_c3,strides=[1,1,1,1],padding='SAME'),b_c3))
conv3=tf.nn.max_pool(conv3,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
conv3=tf.nn.dropout(conv3,keep_prob)
w_d= tf.Variable(w_alpha*tf.random_normal([8*32*40,1024]))
b_d= tf.Variable(b_alpha*tf.random_normal([1024]))
dense=tf.reshape(conv3,[-1,w_d.get_shape().as_list()[0]])
dense=tf.nn.relu(tf.add(tf.matmul(dense,w_d),b_d))
dense=tf.nn.dropout(dense,keep_prob)
w_out=tf.Variable(w_alpha*tf.random_normal([1024,MAX_CAPTCHA*CHAR_SET_LEN]))
b_out=tf.Variable(b_alpha*tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))
out= tf.add(tf.matmul(dense,w_out),b_out)
return out
def train_crack_captcha_cnn():
import time
start_time=time.time()
output=crack_captcha_cnn()
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=Y))
optimizer= tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
predict=tf.reshape(output,[-1,MAX_CAPTCHA,CHAR_SET_LEN])
max_idx_p=tf.argmax(predict,2)
max_idx_l=tf.argmax(tf.reshape(Y,[-1,MAX_CAPTCHA,CHAR_SET_LEN]),2)
correct_pred=tf.equal(max_idx_p,max_idx_l)
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))
saver=tf.train.Saver()
with tf.Session()as sess:
sess.run(tf.global_variables_initializer())
step=0
while True:
batch_x,batch_y=get_next_batch(64)
_,loss_=sess.run([optimizer,loss],feed_dict={X:batch_x,Y:batch_y,keep_prob:0.75})
print (time.strftime('%Y-%m-%d %H: %M: %S',time.localtime(time.time())),step,loss_)
if step %100==0:
batch_x_test,batch_y_test=get_next_batch(100)
acc=sess.run(accuracy,feed_dict={X:batch_x_test,Y:batch_y_test,keep_prob:1.})
print("*********第{}次的准确率为{}*****".format(step,acc))
if acc>0.9:
saver.save(sess,"./model/crack_captcha.model",global_step=step)
print(time.time()-start_time)
break
step +=1
train_crack_captcha_cnn()