文章目录
验证码识别模型数据资料
资料百度云地址
提取码:mbhf
模型搭建过程代码
1. 数据加载及可视化
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
#-*- coding: utf-8 -*-
#load the data
data = pd.read_excel('verifyCode.xls')
data.head()
序号 | 图片名 | 识别结果 | |
---|---|---|---|
0 | 1 | .\img\1.jpg | ojrq |
1 | 2 | .\img\2.jpg | neh3 |
2 | 3 | .\img\3.jpg | 6bhh |
3 | 4 | .\img\4.jpg | hxfn |
4 | 5 | .\img\5.jpg | bfof |
#check the data
X = data.loc[:,"图片名"]
y = data.loc[:,"识别结果"]
X.head()
0 .\img\1.jpg
1 .\img\2.jpg
2 .\img\3.jpg
3 .\img\4.jpg
4 .\img\5.jpg
Name: 图片名, dtype: object
y.head()
X = np.array(X)
y = np.array(y)
print(X.shape,y.shape)
(500,) (500,)
#visualize the original data and split data
from PIL import Image
from keras.preprocessing.image import load_img,img_to_array
fig1 = plt.figure(figsize=(3,2))
img = Image.open(X[60])
img2 = img.convert('L')
#box变量是一个四元组(左,上,右,下)。
box1=(0,0,15,20)
box2=(15,0,30,20)
box3=(30,0,45,20)
box4=(45,0,60,20)
img_array = img_to_array(img2)
img_crop1 = img.crop(box1)
img_crop2 = img.crop(box2)
img_crop3 = img.crop(box3)
img_crop4 = img.crop(box4)
plt.subplot(231)
plt.imshow(img)
plt.subplot(232)
plt.imshow(img2)
plt.subplot(233)
plt.imshow(img_crop1)
plt.subplot(234)
plt.imshow(img_crop2)
plt.subplot(235)
plt.imshow(img_crop3)
plt.subplot(236)
plt.imshow(img_crop4)
img_array = img_array.reshape(20,60)
print(img_array.shape)
(20, 60)
2. 构建数据字典
#transform the label
import string
from keras.utils import to_categorical
letters_list = [chr(i) for i in range(97,123)]
label_dict_digits = {
str(i):i for i in range(10)}
label_dict_letters = {
lt:i+10 for i,lt in enumerate(letters_list)}
# print(label_dict_digits)
# print(label_dict_letters)
#merge two dict
label_dict = dict(label_dict_digits,**label_dict_letters)
print(label_dict)
label_dict: {‘0’: 0, ‘1’: 1, ‘2’: 2, ‘3’: 3, ‘4’: 4, ‘5’: 5, ‘6’: 6, ‘7’: 7, ‘8’: 8, ‘9’: 9, ‘a’: 10, ‘b’: 11, ‘c’: 12, ‘d’: 13, ‘e’: 14, ‘f’: 15, ‘g’: 16, ‘h’: 17, ‘i’: 18, ‘j’: 19, ‘k’: 20, ‘l’: 21, ‘m’: 22, ‘n’: 23, ‘o’: 24, ‘p’: 25, ‘q’: 26, ‘r’: 27, ‘s’: 28, ‘t’: 29, ‘u’: 30, ‘v’: 31, ‘w’: 32, ‘x’: 33, ‘y’: 34, ‘z’: 35}
3. 分割验证码
def array_to_letter(array):
return np.where(array==1)[0][0]
#generate new X and y
new_split_X = np.zeros((4*X.shape[0],300))
new_split_y = np.zeros((4*y.shape[0],