字符识别top4%代码

本文介绍了一种基于卷积神经网络(CNN)的手写数字识别模型构建过程,该模型利用了MNIST和Kaggle数据集进行训练,通过多层卷积、池化和全连接层实现高精度分类。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

# In[]
from keras.datasets import mnist
import scipy.io as sio
import os
import pandas as pd

from keras.utils import np_utils
from keras.models import Sequential
from keras import backend

from keras.layers import Dense, Dropout,Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# In[载入经典MNIST数据:训练集60000测试集10000]
if not os.path.exists("mnist_train_X.mat"):
    (mnist_train_X, mnist_train_y), (mnist_test_X, mnist_test_y) = \
        mnist.load_data()
    sio.savemat('mnist_train_X.mat', {'mnist_train_X' : mnist_train_X})
    sio.savemat('mnist_train_y.mat', {'mnist_train_y' : mnist_train_y})
    sio.savemat('mnist_test_X.mat', {'mnist_test_X' : mnist_test_X})
    sio.savemat('mnist_test_y.mat', {'mnist_test_y' : mnist_test_y})
else:
    mnist_train_X = sio.loadmat('mnist_train_X.mat')['mnist_train_X']
    mnist_train_y = sio.loadmat('mnist_train_y.mat')['mnist_train_y']
    mnist_test_X = sio.loadmat('mnist_test_X.mat')['mnist_test_X']
    mnist_test_y = sio.loadmat('mnist_test_y.mat')['mnist_test_y']

# 使之后对标签进行one-hot编码后满足网络指定格式
mnist_train_y = mnist_train_y.reshape(mnist_train_y.shape[1],)
mnist_test_y = mnist_test_y.reshape(mnist_test_y.shape[1],)


# In[Kaggle的训练集42000测试集28000]
kaggle_train_X = pd.read_csv('train.csv')
kaggle_test_X = pd.read_csv('test.csv')
kaggle_train_y = kaggle_train_X['label']
kaggle_train_X.drop(['label'],axis=1, inplace=True)

# 针对DataFrame没有reshape属性的办法
kaggle_train_X = np.array(kaggle_train_X)
kaggle_test_X = np.array(kaggle_test_X)

## In[数据总览]
## 绘制计数直方图
#sns.countplot(mnist_train_y)
#plt.xlabel("keras_mnist label")
#plt.show()
## 使用pd.Series.value_counts()
#unique, count= np.unique(mnist_train_y, return_counts=True)
#print("number of labels of keras_mnist = \n%s " % dict (zip(unique, count) ), "\n" )
#
## 绘制计数直方图
#sns.countplot(kaggle_train_y)
#plt.xlabel("kaggle_mnist label")
#plt.show()
## 使用pd.Series.value_counts()
#unique, count= np.unique(kaggle_train_y, return_counts=True)
#print("number of labels of kaggle_mnist = \n%s " % dict (zip(unique, count) ), "\n" )

# In[数据归一化]
mnist_train_X = mnist_train_X / 255
mnist_test_X = mnist_test_X / 255
kaggle_train_X = kaggle_train_X / 255
kaggle_test_X = kaggle_test_X / 255

# In[将数据reshape成符合CNN输入的格式]
mnist_train_X = \
    mnist_train_X.reshape(mnist_train_X.shape[0],28,28,1).astype('float32')
mnist_test_X = \
    mnist_test_X.reshape(mnist_test_X.shape[0],28,28,1).astype('float32')
mnist_train_y = np_utils.to_categorical(mnist_train_y)
mnist_test_y = np_utils.to_categorical(mnist_test_y)

kaggle_train_X = \
    kaggle_train_X.reshape(kaggle_train_X.shape[0],28,28,1).astype('float32')
kaggle_test_X = \
    kaggle_test_X.reshape(kaggle_test_X.shape[0],28,28,1).astype('float32')
kaggle_train_y = np_utils.to_categorical(kaggle_train_y)

# In[增强数据]
# 将Kaggle和MNIST训练集集合在一起
X_train = np.concatenate((mnist_train_X,kaggle_train_X), axis=0)
y_train = np.concatenate((mnist_train_y,kaggle_train_y), axis=0)

# In[构建网络结构]
# 顺序层被堆叠,使得每个层都将其输出传递到下一层,而无需指定额外信息
model = Sequential() 
backend.set_image_data_format('channels_last') # 规定图像数据输入格式:通道在最后

model.add(Conv2D(40, kernel_size=5, padding="same",input_shape=(28, 28, 1), \
          activation = 'relu'))
model.add(Conv2D(50, kernel_size=5, padding="valid", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(70, kernel_size=3, padding="same", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(100, kernel_size=3, padding="valid", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

model.add(Flatten())
model.add(Dense(100))
model.add(Activation("relu"))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', \
              metrics=['accuracy'])

# In[绘制网络结构]
from keras.utils.vis_utils import plot_model

plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=False)

# In[设置训练网络的epoch和batch_size]
epochs = 32
batch_size = 2048

model.fit(X_train, y_train, epochs= epochs , batch_size = batch_size, \
          validation_split = 0.2)

scores = model.evaluate(mnist_test_X, mnist_test_y, verbose = 10 )
print ( scores )


# In[]
#kaggle_test_X = (kaggle_test_X.values).reshape(kaggle_test_X.shape[0], 28, 28 , 1).astype('float32')

res = model.predict(kaggle_test_X)
res = np.argmax(res,axis = 1)
res = pd.Series(res, name="Label")
submission = pd.concat([pd.Series(range(1 ,28001) ,name = "ImageId"),   res],\
                        axis = 1)
submission.to_csv("LCNN2_"+str(epochs)+"epoch_"+str(batch_size)+\
                  "batch_size.csv",index=False)
 

### 使用 EasyOCR 进行车牌字符识别的解决方案 #### 1. 易于使用的 OCR 工具 EasyOCR 是一种高效且功能强大的光学字符识别工具,支持超过 80 种语言[^1]。由于其基于深度学习的技术架构,它能够在复杂的图像环境中保持较高的识别精度。 #### 2. 车牌字符识别流程概述 为了实现车牌字符识别的功能,通常需要结合目标检测技术和 OCR 技术。以下是具体方法: - **目标检测**: 利用 YOLO 或其他目标检测框架来定位图像中的车牌区域。 - **文本提取**: 将检测到的车牌区域传递给 EasyOCR,从而完成字符识别的任务。 这种组合方式已经被证明在实际场景中非常有效[^2]。 #### 3. Python 实现代码示例 下面是一个完整的 Python 示例程序,展示如何利用 YOLO 和 EasyOCR 来从图片或视频流中识别人物车牌上的文字。 ```python import cv2 from ultralytics import YOLO import easyocr from PIL import Image import numpy as np # 初始化 EasyOCR 阅读器对象,指定所需的语言(这里假设为英语) reader = easyocr.Reader(['en'], gpu=True) # 加载预训练好的 YOLO 模型 model = YOLO('yolov8n.pt') def detect_and_read_license_plate(image_path): """ Detects license plates using YOLO and reads text on them via EasyOCR. Args: image_path (str): Path to input image. Returns: str: Extracted license plate number or None if no detection occurs. """ img = cv2.imread(image_path) results = model.predict(img, conf=0.5)[0] for result in results.boxes.data.tolist(): x1, y1, x2, y2, _, _ = map(int, result) # 截取车牌部分作为子图输入至 EasyOCR 中进行进一步处理 cropped_img = img[y1:y2, x1:x2] ocr_result = reader.readtext(cropped_img) if not ocr_result: continue top_prediction = max(ocr_result, key=lambda r:r[-1]) # 获取置信度最高的预测结果 detected_text, confidence = top_prediction[1], round(top_prediction[2]*100, 2) print(f"Detected Text: {detected_text}, Confidence Level: {confidence}%") return detected_text return None if __name__ == "__main__": test_image = './test_images/licenseplate.jpg' extracted_number = detect_and_read_license_plate(test_image) if extracted_number is not None: print("License Plate Number:",extracted_number) ``` 此脚本展示了基本的工作原理以及如何调用相关 API 函数[^3]。 #### 4. 应用领域 该方案可用于多个行业和技术应用场合,比如智能交通管理系统、停车场自动化管理和安防监控等方面[^2]。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值