百度飞桨—— 车牌识别学习与修改_飞桨车牌识别-优快云博客

本文链接：https://blog.youkuaiyun.com/weixin_43893363/article/details/113565397

本文介绍了一个基于CNN的车牌识别系统实现过程，涵盖数据集准备、模型构建、训练、评估及应用等关键步骤。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1. 数据集描述

本数据集图片数约3.2万张，划分为65个部分。分别包含车牌中0到9（10个数字），A到Z（24个英文字母），以及31个省简称的图片数据。其中，图片分辨率为20*20的灰度图像。数据集中的10%作为测试集，90%作为训练集。

2. 技术路线

      采用计算机视觉技术CNN实现车牌识别的流程主要包括数据准备，定义模型，训练模型，模型评估和使用模型5个步骤。路线图如下图1所示。
      在数据准备部分，完成了各种参数的配置及初始化。
      在定义模型部分，完成了对CNN模型的定义。
      在训练模型部分，完成了对测试集和数据集训练，这里我取数据集中的10%作为测试集，90%作为训练集。
      在模型评估部分，完成了对测试集的平均准确率进行评估。
      在使用模型部分，完成了包括对车牌图像进行预处理，对标签进行转换及使用模型进行预测，其中对车牌图像进行预处理包括对图像进行灰度处理、分割出车牌中的每一个字符，填充后保存。
在这里插入图片描述

3. 模型结构

模型采用CNN网络结构，包含3个卷积层、2个池化层、多维输入一维化和2个线性变换。具体结构如图2所示。
在这里插入图片描述

4 .训练集上的表现

训练集上迭代了14次，记录每次训练的精度和损失并输出显示，最后绘制训练的精度和损失曲线。随着迭代次数的增加，训练精度来回波动并趋于1稳定，训练损失来回波动并趋于0。
在这里插入图片描述

5 .测试集上的表现

训练完成后，在测试集中进行测试，输出损失和精度的平均值。可以看到平均损失趋近于0高于训练损失1~2个数量级，平均精度趋近于1。测试结果如图6所示。
在这里插入图片描述

6. 主要问题解决过程

读取中文路径的cv2.imdecode()函数，发现还是报错，研究发现cv2.imdecode()函数里面有一个处理图像的参数，跟原来用cv2.imread()后又使用cv2.cvtColor()函数来处理图像冲突了，将cv2.cvtColor()函数问题就解决了。
注：我将读取压缩包代码删除，将数据集直接解压读取

7.运行结果

在这里插入图片描述

8.代码

#导入需要的包
import os     #导入标准库os
import random #random() 方法返回随机生成的一个实数，它在[0,1)范围内
import json   #编码和解码 JSON 对象
import cv2    #图片操作
import numpy as np #矩阵操作
from PIL import Image #图片处理模块
import paddle  #百度深度学习平台PaddlePaddle框架
import paddle.fluid as fluid#做图像分类工作
from paddle.fluid.dygraph import Linear,Conv2D,Pool2D # Linear线性变换层 Conv2D卷积二维层 Pool2D返回池化结果
import matplotlib.pyplot as plt #绘图

1、数据准备

'''
参数配置
'''
train_parameters = {
    "input_size": [1, 20, 20],                           #输入图片的shape
    "class_dim": -1,                                     #分类数
    "target_path":"./dataset",        #要解压的路径 
    "train_list_path": "./work/train_data.txt",              #train_data.txt路径
    "eval_list_path": "./work/val_data.txt",                  #eval_data.txt路径
    "label_dict":{},                                    #标签字典
    "readme_path": "./work/readme.json",   #readme.json路径
    "num_epochs":14,                                    #训练轮数
    "train_batch_size":20,                             #批次的大小
    "learning_strategy": {                              #优化函数相关的配置
        "lr": 0.01                                     #超参数学习率
    }
}

def get_data_list(target_path,train_list_path,eval_list_path):
    '''
    生成数据列表
    '''
    #存放所有类别的信息
    class_detail = []
    #获取所有类别保存的文件夹名称
    data_list_path=target_path
    
    class_dirs = os.listdir(data_list_path)
    #print(class_dirs)
    if '__MACOSX' in class_dirs:
        class_dirs.remove('__MACOSX')
    # #总的图像数量
    all_class_images = 0
    # #存放类别标签
    class_label=0
    # #存放类别数目
    class_dim = 0
    #存储要写进eval.txt和train.txt中的内容
    trainer_list=[]
    eval_list=[]
    #读取每个类别
    for class_dir in class_dirs:
        if class_dir != ".DS_Store":
            class_dim += 1
            #每个类别的信息
            class_detail_list = {}
            eval_sum = 0
            trainer_sum = 0
            #统计每个类别有多少张图片
            class_sum = 0
            #获取类别路径 
            path = os.path.join(data_list_path,class_dir).replace('\\','/')
            #print(path)
            # 获取所有图片
            img_paths = os.listdir(path)
            for img_path in img_paths:                                  # 遍历文件夹下的每个图片
                if img_path =='.DS_Store':
                    continue
                name_path = os.path.join(path,img_path).replace('\\','/')  # 每张图片的路径
                if class_sum % 10 == 0:                                 # 每10张图片取一个做验证数据
                    eval_sum += 1                                       # eval_sum为测试数据的数目
                    eval_list.append(name_path + "\t%d" % class_label + "\n")
                else:
                    trainer_sum += 1 
                    trainer_list.append(name_path + "\t%d" % class_label + "\n")#trainer_sum测试数据的数目
                class_sum += 1                                          #每类图片的数目
                all_class_images += 1                                   #所有类图片的数目
            
            # 说明的json文件的class_detail数据
            class_detail_list['class_name'] = class_dir             #类别名称
            class_detail_list['class_label'] = class_label          #类别标签
            class_detail_list['class_eval_images'] = eval_sum       #该类数据的测试集数目
            class_detail_list['class_trainer_images'] = trainer_sum #该类数据的训练集数目
            class_detail.append(class_detail_list)  
            #初始化标签列表
            train_parameters['label_dict'][str(class_label)] = class_dir
            class_label += 1
            
    #初始化分类数
    train_parameters['class_dim'] = class_dim
    print(train_parameters)
    #乱序  
    random.shuffle(eval_list)
    #文件的写操作
    with open(eval_list_path, 'a') as f:
        for eval_image in eval_list:
            f.write(eval_image) 
    #乱序        
    random.shuffle(trainer_list) 
    with open(train_list_path, 'a') as f2:
        for train_image in trainer_list:
            f2.write(train_image) 

    # 说明的json文件信息
    readjson = {}
    readjson['all_class_name'] = data_list_path                  #文件父目录
    readjson['all_class_images'] = all_class_images
    readjson['class_detail'] = class_detail
    jsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))
    with open(train_parameters['readme_path'],'w') as f:
        f.write(jsons)
    print ('生成数据列表完成！')
#生成数据列表   
#get_data_list(target_path,train_list_path,eval_list_path)

def data_reader(file_list):
    '''
    自定义读图片函数
    '''
    def reader():
        with open(file_list, 'r') as f:
            lines = [line.strip() for line in f]
            for line in lines:
                img_path, lab = line.strip().split('\t')  #切割字符串
                img =cv2.imdecode(np.fromfile(img_path, dtype=np.uint8),0) #imdecode解决读中文路径问题，参数0转换为灰度图像
                img = np.array(img).astype('float32') #变化数组类型
                img = img/255.0 #对图像进行归一化，范围为[0, 1]
                yield img, int(lab) 
    return reader

'''
参数初始化
'''
target_path=train_parameters['target_path']
train_list_path=train_parameters['train_list_path']
eval_list_path=train_parameters['eval_list_path']
batch_size=train_parameters['train_batch_size']


#每次生成数据列表前，首先清空train.txt和eval.txt；从文件开头开始偏移，截断之后偏移后面的所有字符被删除
with open(train_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
with open(eval_list_path, 'w') as f: 
    f.seek(0)
    f.truncate() 
    
#生成数据列表   
get_data_list(target_path,train_list_path,eval_list_path)

'''
构造数据提供器返回的reader将输入reader的数据打包成指定的batch_size大小的批处理数据
paddle.fluid.io.batch(读取数据的数据reader,
批尺寸 batch_size, 
若设置为True，则当最后一个batch不等于batch_size时，丢弃最后一个batch；若设置为False，则不会。默认值为False)
'''
train_reader = paddle.batch(data_reader(train_list_path),
                            batch_size=batch_size,
                            drop_last=True)
eval_reader = paddle.batch(data_reader(eval_list_path),
                            batch_size=batch_size,
                            drop_last=True)

Batch=0
Batchs=[]
all_train_accs=[]
def draw_train_acc(Batchs, train_accs):
    title="training accs"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("acc", fontsize=14)
    plt.plot(Batchs, train_accs, color='green', label='training accs')#将y对x绘制为线条和/或标记
    plt.legend() #给图像加上图例
    plt.grid() # 生成网格
    plt.show()

all_train_loss=[]
def draw_train_loss(Batchs, train_loss):
    title="training loss"
    plt.title(title, fontsize=24)
    plt.xlabel("batch", fontsize=14)
    plt.ylabel("loss", fontsize=14)
    plt.plot(Batchs, train_loss, color='red', label='training loss')
    plt.legend()
    plt.grid()
    plt.show()

2、定义模型

#定义CNN网络
class MyCNN(fluid.dygraph.Layer):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.c1 = Conv2D(1, 6, 5, 1) #批次的大小，卷积核个数，卷积核大小（5*5）
        self.s2 = Pool2D(pool_size=2, pool_type='max', pool_stride=1) #池化核的大小；池化类型；池化层的步长
        self.c3 = Conv2D(6, 16, 5, 1)
        self.s4 = Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        self.c5 = Conv2D(16, 120, 10, 1)
        self.f6 = Linear(120, 84, act='relu') #线性变换层输入单元的数目;线性变换层输出单元的数目； 激活函数
        self.f7 = Linear(84, 65, act='softmax')
        

    def forward(self, input):
        #print("input shape : " + str(input.shape)) #[10, 1, 20, 20] Conv2D(1, 6, 5, 1) f=5*5
        x = self.c1(input) 
        #print("C1 : " + str(x.shape)) #[10, 6, 16, 16]  16=20-5+1  Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        x = self.s2(x) 
        #print("S2 : " + str(x.shape)) #[10, 6, 15, 15]  15=16-2+ Conv2D(6, 16, 5, 1)  
        x = self.c3(x) 
        #print("C3 : " + str(x.shape)) #[10, 16, 11, 11]  11=15-5+1 Pool2D(pool_size=2, pool_type='max', pool_stride=1)
        x = self.s4(x) 
        #print("S4 : " + str(x.shape)) #[10, 16, 10, 10]  10=11-2+1 Conv2D(16, 120, 10, 1)
        x = self.c5(x)  
        #print("C5 : " + str(x.shape)) #[10, 120, 1, 1] 1=10-10+1
        x = fluid.layers.reshape(x, shape=[-1, 120])
        #print(x.shape) #[10, 120] Linear(120, 84, act='relu')
        x = self.f6(x) 
        #print("C6 : " + str(x.shape)) #[10, 84] Linear(84, 65, act='softmax')
        y = self.f7(x) 
        #print("C7 : " + str(y.shape)) #[10, 65]
        return y

3、训练模型

with fluid.dygraph.guard():
    model=MyCNN() #模型实例化
    model.train() #训练模式
    opt=fluid.optimizer.SGDOptimizer(learning_rate=train_parameters['learning_strategy']['lr'], parameter_list=model.parameters())#优化器选用SGD随机梯度下降，学习率为0.001.
    epochs_num=train_parameters['num_epochs'] #迭代次数
    
    for pass_num in range(epochs_num):
        for batch_id,data in enumerate(train_reader()):
            images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
            labels = np.array([x[1] for x in data]).astype('int64') #转换数据类型
            labels = labels[:, np.newaxis]
            image=fluid.dygraph.to_variable(images) #创建一个 Variable 类型的对象
            label=fluid.dygraph.to_variable(labels)

            predict=model(image) #数据传入model
            
            loss=fluid.layers.cross_entropy(predict,label)
            avg_loss=fluid.layers.mean(loss)#获取loss值
            
            acc=fluid.layers.accuracy(predict,label)#计算精度
            
            if batch_id!=0 and batch_id%50==0:
                Batch = Batch+50 
                Batchs.append(Batch)
                all_train_loss.append(avg_loss.numpy()[0])
                all_train_accs.append(acc.numpy()[0])
                
                print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(pass_num,batch_id,avg_loss.numpy(),acc.numpy()))
            
            avg_loss.backward()       
            opt.minimize(avg_loss)    #优化器对象的minimize方法对参数进行更新 
            model.clear_gradients()   #model.clear_gradients()来重置梯度
    fluid.save_dygraph(model.state_dict(),'MyCNN')#保存模型

draw_train_acc(Batchs,all_train_accs) #训练集精度
draw_train_loss(Batchs,all_train_loss) #训练集上的损失

4、模型评估

#模型评估
with fluid.dygraph.guard():
    accs = []
    losss=[]
    model_dict, _ = fluid.load_dygraph('MyCNN')
    model = MyCNN()
    model.load_dict(model_dict) #加载模型参数
    model.eval() #训练模式
    for batch_id,data in enumerate(eval_reader()):#测试集
        images=np.array([x[0].reshape(1,20,20) for x in data],np.float32)
        labels = np.array([x[1] for x in data]).astype('int64')
        labels = labels[:, np.newaxis]
        image=fluid.dygraph.to_variable(images)
        label=fluid.dygraph.to_variable(labels)       
        predict=model(image)       
        acc=fluid.layers.accuracy(predict,label)
        accs.append(acc.numpy()[0])
        avg_acc = np.mean(accs) #求取均值
        
        loss=fluid.layers.cross_entropy(predict,label)
        losss.append(loss.numpy()[0]) 
        avg_loss=np.mean(losss)#获取loss值
    print("平均损失：",avg_loss)
    print("平均精度：",avg_acc)

5、使用模型
5.1对车牌图像进行预处理

# 对车牌图片进行处理，分割出车牌中的每一个字符并保存
gray_plate =cv2.imdecode(np.fromfile('./work/车牌3.png', dtype=np.uint8),0)
ret, binary_plate = cv2.threshold(gray_plate, 175, 255, cv2.THRESH_BINARY) #ret：阈值，binary_plate：根据阈值处理后的图像数据
# 按列统计像素分布
result = []
for col in range(binary_plate.shape[1]):
    result.append(0)
    for row in range(binary_plate.shape[0]):
        result[col] = result[col] + binary_plate[row][col]/255
#print(result)
#记录车牌中字符的位置
character_dict = {}
num = 0
i = 0
while i < len(result):
    if result[i] == 0:
        i += 1
    else:
        index = i + 1
        while result[index] != 0:
            index += 1
        character_dict[num] = [i, index-1]
        num += 1
        i = index
# print(character_dict)   
# print(i)
#将每个字符填充，并存储
characters = []
for i in range(8):
    if i==2:
        continue
    padding = (170 - (character_dict[i][1] - character_dict[i][0])) / 2
    #图像四周边缘填充0,将单个字符图像填充为170*170
    ndarray = np.pad(binary_plate[:,character_dict[i][0]:character_dict[i][1]], ((0,0), (int(padding), int(padding))), 'constant', constant_values=(0,0))
    ndarray = cv2.resize(ndarray, (20,20))
    cv2.imwrite('./work/' + str(i) + '.png', ndarray)
    characters.append(ndarray)
    
# 加载file路径下图像;变化数组类型;增加一个维度;归一化
def load_image(path):
    img = paddle.dataset.image.load_image(file=path, is_color=False)
    img = img.astype('float32')
    img = img[np.newaxis, ] / 255.0
    return img

5.2 对标签进行转换

#将标签进行转换
print('转换前Label：\n')
print(train_parameters['label_dict'])
print('\n转换后：\n')
match = {'A':'A','B':'B','C':'C','D':'D','E':'E','F':'F','G':'G','H':'H','I':'I','J':'J','K':'K','L':'L','M':'M','N':'N',
        'O':'O','P':'P','Q':'Q','R':'R','S':'S','T':'T','U':'U','V':'V','W':'W','X':'X','Y':'Y','Z':'Z',
        'yun':'云','cuan':'川','hei':'黑','zhe':'浙','ning':'宁','jin':'津','gan':'赣','hu':'沪','liao':'辽','jl':'吉','qing':'青','zang':'藏',
        'e1':'鄂','meng':'蒙','gan1':'甘','qiong':'琼','shan':'陕','min':'闽','su':'苏','xin':'新','wan':'皖','jing':'京','xiang':'湘','gui':'贵',
        'yu1':'渝','yu':'豫','ji':'冀','yue':'粤','gui1':'桂','sx':'晋','lu':'鲁',
        '0':'0','1':'1','2':'2','3':'3','4':'4','5':'5','6':'6','7':'7','8':'8','9':'9'}
L = 0
LABEL ={}
for V in train_parameters['label_dict'].values():
    LABEL[str(L)] = match[V]
    L += 1
print(LABEL)

5.3 使用模型进行预测

#构建预测动态图过程
with fluid.dygraph.guard():
    model=MyCNN()#模型实例化DNN
    model_dict,_=fluid.load_dygraph('MyCNN')
    model.load_dict(model_dict)#加载模型参数
    model.eval()#评估模式
    lab=[]
    for i in range(8):
        if i==2:
            continue
        infer_imgs = []
        infer_imgs.append(load_image('work/' + str(i) + '.png'))
        infer_imgs = np.array(infer_imgs)
        infer_imgs = fluid.dygraph.to_variable(infer_imgs) #从numpy.ndarray对象创建一个 Variable 类型的对象。
        result=model(infer_imgs) #数据传入model
        lab.append(np.argmax(result.numpy()))
#print(lab)#每个字符所在数组位置
display(Image.open('work/车牌3.png'))
print("识别出的车牌号为:")
for i in range(len(lab)):
    print(LABEL[str(lab[i])],end='')