最近上课有一门课需要做一个大作业,用三种方法实现一个任务。分别是基于特征的方法、基于传统的机器学习方法和基于深度学习的方法
由于本人水平有限,只能从简单的车牌识别任务入手。
三种方法都是博采众长得来,不过很多网上的代码有些问题不能运行,于是本人就做了些修改,也方便以后的人查询
车牌识别首先就是车牌区域的分割
[https://blog.youkuaiyun.com/u011808673/article/details/78510692]
首先是字符分割的代码:
int segmentation()
{
Mat origin_image = imread("2.jpg");
Mat gray_image;
cvtColor(origin_image, gray_image, CV_RGB2GRAY);
Mat canny_image;
Canny(gray_image, canny_image, 100, 200, 3);
Mat dilate_image, erode_image;
Mat elementX = getStructuringElement(MORPH_RECT, Size(19, 1));
Mat elementY = getStructuringElement(MORPH_RECT, Size(1, 20));
Point point(-1, -1);
dilate(canny_image, dilate_image, elementX, point, 2);
erode(dilate_image, erode_image, elementX, point, 4);
dilate(erode_image, dilate_image, elementX, point, 2);
erode(dilate_image, erode_image, elementY, point, 2);
dilate(erode_image, dilate_image, elementY, point, 2);
Mat blurr_image;
medianBlur(dilate_image, blurr_image, 15);
medianBlur(blurr_image, blurr_image, 15);
Mat contour_image;
contour_image = blurr_image.clone();
vector<vector<Point>> contours;
findContours(contour_image, contours, CV_RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);
drawContours(contour_image, contours, -1, Scalar(255, 0, 0), 1);
Mat roi_image;
vector<Point> rectPoint;
for (int i = 0; i < contours.size(); i++)
{
Rect r = boundingRect(Mat(contours[i]));
cout << "contours" << i << " height=" << r.height << " width="
<< r.width << " rate=" << ((float)r.width / r.height) << endl;
if ((float)r.width / r.height >= 3 && (float)r.width / r.height <= 5)
{
cout << "r.x = " << r.x << " r.y = " << r.y << endl;
rectangle(contour_image, r, Scalar(0, 0, 255), 2);
Point p1, p2, p3, p4;
p1.x = r.x;
p1.y = r.y;
p2.x = r.x + r.width;
p2.y = r.y;
p3.x = r.x + r.width;
p3.y = r.y + r.height;
p4.x = r.x;
p4.y = r.y + r.height;
rectPoint.push_back(p1);
rectPoint.push_back(p2);
rectPoint.push_back(p3);
rectPoint.push_back(p4);
for (int j = 0; j < contours[i].size(); j++)
{
cout << "point = " << contours[i][j] << endl;
}
roi_image = origin_image(r);
}
}
imwrite("roi_image.jpg", roi_image);
Mat large_image;
int col = roi_image.cols, row = roi_image.rows;
resize(roi_image, large_image, Size(300, 300 * row / col));
Mat roi_gray_image;
cvtColor(large_image, roi_gray_image, CV_RGB2GRAY);
Mat canny_roi_image;
Canny(roi_gray_image, canny_roi_image, 100, 200, 3);
imwrite("canny_roi_image.jpg", canny_roi_image);
Mat roi_contours_image;
vector<vector<Point>> roi_contours;
roi_contours_image = canny_roi_image.clone();
findContours(roi_contours_image, roi_contours, CV_RETR_EXTERNAL,
CHAIN_APPROX_SIMPLE);
vector<Point> roi_rectPoint;
for (int i = 0; i < roi_contours.size(); i++)
{
Rect r = boundingRect(Mat(roi_contours[i]));
cout << "contours" << i << " height=" << r.height
<< " width=" << r.width << " rate = "
<< ((float)r.width / r.height) << endl;
cout << "r.x = " << r.x << " r.y = " << r.y << endl;
Point p1, p2, p3, p4;
p1.x = r.x;
p1.y = r.y;
p2.x = r.x + r.width;
p2.x = r.y;
p3.x = r.x + r.width;
p3.y = r.y + r.height;
p4.x = r.x;
p4.y = r.y + r.height;
roi_rectPoint.push_back(p1);
roi_rectPoint.push_back(p2);
roi_rectPoint.push_back(p3);
roi_rectPoint.push_back(p4);
for (int j = 0; j < roi_contours[i].size(); j++) {
cout << "point = " << roi_contours[i][j] << endl;
}
}
int contours_height[30], contours_width[30];
for (int i = 0; i < roi_contours.size(); i++) {
Rect r = boundingRect(Mat(roi_contours[i]));
contours_height[i] = r.height;
contours_width[i] = r.width;
cout << "contours_height = " << r.height << " contours_width = "
<< r.width << endl;
}
int roi_col = canny_roi_image.cols, roi_row = canny_roi_image.rows,
position1[50], position2[50], roi_width[50];
uchar pix;
int pixcol[1000];
for (int i = 0; i < roi_col; i++)
{
for (int j = 0; j < roi_row; j++)
{
pix = canny_roi_image.at<uchar>(j, i);
pixcol[i] = 0;
if (pix > 0)
{
pixcol[i] = 1;
break;
}
}
}//标出含有像素的列
for (int i = 2; i < roi_col - 2; i++)
{
if ((pixcol[i - 2] + pixcol[i - 1] + pixcol[i + 1] + pixcol[i + 2]) >= 3)
{
pixcol[i] = 1;
}
else if ((pixcol[i - 2] + pixcol[i - 1] + pixcol[i + 1] + pixcol[i + 2]) <= 1)
{
pixcol[i] = 0;
}
}//对水平数组滤波
int count = 0;
bool flag = false;
for (int i = 0; i < roi_col - 1; i++)
{
pix = pixcol[i];
if (pix == 1 && !flag)
{
flag = true;
position1[count] = i;
continue;
}
if (pix == 0 && flag)
{
flag = false;
position2[count] = i;
count++;
}
}//确认字符位置
//记录所有字符宽度
for (int i = 0; i < count; i++)
{
cout << "position1 = " << position1[i] << " position2 = " << position2[i]
<< " distance =" << (position2[i] - position1[i]) << endl;
roi_width[i] = position2[i] - position1[i];
}
//找出最值
int max = roi_width[0], max_index = 0;
int min = roi_width[0], min_index = 0;
for (int n = 1; n < count; n++)
{
if (max < roi_width[n])
{
max = roi_width[n];
max_index = n;
}
if (min > roi_width[n])
{
min = roi_width[n];
min_index = n;
}
}
int index = 0;
int new_roi_width[50] = { 0 };
for (int i = 0; i < count; i++)
{
if (i == min_index || i == max_index)
{
}
else
{
new_roi_width[index] = roi_width[i];
index++;
}
}
int average, sum = 0;
for (int i = 0; i < count - 2; i++)
{
sum += new_roi_width[i];
}
average = sum / (count - 2);//取均值
int licenseX[10] = { 0 }, licenseW[10] = { 0 }, licenseNum = 0;
for (int i = 0; i < count; i++)
{
if (roi_width[i] > (average - 8) && roi_width[i] < (average + 8))
{
licenseX[licenseNum] = position1[i];
licenseW[licenseNum] = roi_width[i];
licenseNum++;
cout << "licenseX = " << licenseX[i] << " roi_width ="
<< roi_width[i] << endl;
continue;
}
}
int position3 = 0, position4 = roi_row;
int licenseY[10] = { 0 }, licenseH[10] = { 0 }, licenseNum2 = 0;
for (int i = 0; i < count; i++)
{
licenseY[licenseNum2] = position3;
licenseH[licenseNum2] = position4;
licenseNum2++;
cout << "licenseY = " << licenseY[i] << " roi_height ="
<< licenseH[i] << endl;
}
//截取字符
Mat licenseN = Mat(Scalar(0));
for (int i = 0; i < 7; i++)
{
Rect rect(licenseX[i], licenseY[i], licenseW[i], licenseH[i]);
cout << "position = " << licenseX[i] << " " << licenseY[i] << " "
<< licenseW[i] << " " << licenseH[i] << endl;
licenseN = large_image(rect);
//imshow("test1"+i, licenseN);
ostringstream oss;
oss << "licenseN" << i << ".jpg";
imwrite(oss.str(), licenseN);
}
cout << "license plate process" << endl;
return 0;
}
实验效果如下:
另外附上一个二值化的程序吧
Mat binarization(Mat src) { Mat gray_image; cvtColor(src, gray_image, CV_RGB2GRAY); medianBlur(gray_image, gray_image, 3); //imshow("gray", gray_image); Mat binary_image; threshold(gray_image, binary_image, 210, 255, CV_THRESH_BINARY); //imshow("test", binary_image); return binary_image; }
网上很多基于灰度的模板特征值,个人感觉没什么用,但是说实话能力有限想不出更好的办法,先做了交差吧
#define TEMPLETENUM 43
#define CHARACTER 15
#include<opencv2/opencv.hpp>
#include <opencv2/imgproc/types_c.h>
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include <opencv2/ml/ml.hpp>
#include<iostream>
#include<sstream>
using namespace std;
using namespace cv;
const int Num_Templete[TEMPLETENUM][CHARACTER] =
{
{16,19,10,12,10,10,15,18,110,3,2,2,3,3,3}, //0
{9,11,10,10,10,10,9,10,79,2,2,2,0,2,12}, //1
{20,21,3,18,10,10,23,22,123,4,2,2,7,6,8}, //2
{19,21,11,14,4,20,18,22,129,2,2,4,6,6,7}, //3
{2,18,11,22,20,21,11,18,123,2,4,2,6,7,5}, //4
{23,19,20,12,9,20,18,22,143,2,4,4,6,6,6}, //5
{6,13,17,8,15,20,18,20,117,2,2,4,5,7,6}, //6
{21,21,0,20,8,12,9,11,102,2,2,2,2,8,15}, //7
{17,18,18,19,14,20,17,20,143,4,2,4,6,6,6}, //8
{20,21,21,22,7,19,13,7,116,3,3,3,8,8,8}, //9
{10,10,16,16,20,20,18,19,129,2,4,2,8,3,6}, //A
{24,20,20,19,22,22,24,20,171,4,8,4,6,6,6}, //B
{18,19,20,4,20,8,17,21,127,3,2,4,4,4,4}, //C
{23,19,11,20,12,20,22,21,148,3,3,3,4,4,4}, //D
{21,19,21,20,22,21,23,23,160,4,4,4,8,8,8}, //E
{21,20,20,20,22,21,19,0,120,4,4,4,7,8,7}, //F
{17,18,22,14,12,24,18,21,146,4,7,4,4,6,6}, //G
{14,20,18,22,17,22,16,20,149,4,1,4,2,2,2}, //H
{0,17,0,20,3,20,18,22,100,2,2,4,2,2,2}, //J
{19,20,26,10,20,20,20,22,157,4,4,4,3,5,11}, //K
{20,0,20,0,20,0,25,20,105,2,2,2,2,2,2}, //L
{20,10,27,17,20,10,22,14,140,1,3,3,4,1,5}, //M
{21,12,25,17,26,12,18,18,149,3,5,3,5,5,6}, //N
{23,19,18,20,21,8,22,0,131,3,3,2,4,4,4}, //P
{18,19,20,10,26,15,18,21,147,3,3,4,5,7,5}, //Q
{26,19,21,18,21,17,20,21,163,4,3,4,4,6,5}, //R
{18,18,18,10,8,17,17,22,128,4,3,4,6,6,6}, //S
{22,18,10,10,10,10,10,10,100,2,2,2,33,2,2}, //T
{18,12,20,10,20,10,19,21,130,3,3,3,2,2,2}, //U
{20,19,20,20,15,14,9,10,127,4,4,2,9,1,8}, //V
{21,25,26,28,16,16,21,19,172,6,2,4,13,0,7}, //W
{21,21,13,13,12,11,22,21,134,4,2,4,8,0,10}, //X
{21,20,10,11,10,10,10,11,103,3,2,2,5,2,6}, //Y
{21,23,5,15,15,5,24,20,128,2,2,2,8,8,7}, //Z
{13,14,10,10,10,10,13,13,93,2,2,2,29,2,29}, //I
{20,20,13,20,19,12,17,20,141,3,3,4,4,4,4}, //O
{14,15,17,17,16,10,25,24,138,0,2,4,12,8,9}, //云
{17,20,17,12,33,28,23,20,170,3,4,7,13,6,4}, //苏
{21,21,23,24,24,25,31,27,196,0,9,6,8,6,7}, //京
{19,27,20,34,19,36,24,37,216,4,4,7,13,28,3}, //湘
{17,14,23,27,36,40,26,27,210,4,13,4,16,14,14}, //鲁
{24,24,32,38,34,32,17,22,223,9,6,10,11,12,9}, // 粤
{22,20,33,37,25,24,24,25,210,13,3,6,12,8,7}, //蒙
};
//车牌字符
const char *PlateCode[TEMPLETENUM] =
{
"0", "1", "2", "3", "4" ,
"5","6", "7", "8", "9",
"A", "B", "C", "D","E",
"F", "G","H", "J", "K",
"L", "M", "N","P", "Q",
"R", "S", "T", "U", "V",
"W","X", "Y", "Z", "I", "O",
"云", "苏","京", "湘", "鲁","粤","蒙"
};
const char *G_PlateChar[7];
void charRecognize(Mat src, int num, int char_num)//num:车牌号中的顺序,char_num:识别出的字符顺序
{
int k, i, j;
int char_begin, char_end;
int num_t[CHARACTER] = { 0 };
switch (num)
{
case 0:char_begin = 36; char_end = 42; break;
case 1:char_begin = 10; char_end = 35; break;
case 2:char_begin = 0; char_end = 35; break;
case 3:char_begin = 0; char_end = 35; break;
case 4:char_begin = 0; char_end = 35; break;
case 5:char_begin = 0; char_end = 35; break;
case 6:char_begin = 0; char_end = 35; break;
default:break;
}
for (k = 0; k < 8; k++)
{
for (j = int(k / 2) * 10; j<int(k / 2 + 1) * 10; j++)
{
for (i = (k % 2) * 10; i < (k % 2 + 1) * 10; i++)
{
num_t[k]+= src.at<uchar>(j, i);
}
}
num_t[8] += num_t[k]; // 第9个特征 前8个特征的和作为第9个特征值
}
for (i = 0; i < 20; i++) //以下特征也是 固定算法得到的
num_t[9] += src.at<uchar>(10, i);
for (i = 0; i < 20; i++)
num_t[10] += src.at<uchar>(20, i);
for (i = 0; i < 20; i++)
num_t[11] += src.at<uchar>(30, i);
for (j = 0; j < 40; j++)
num_t[12] += src.at<uchar>(j, 7);
for (j = 0; j < 40; j++)
num_t[13] += src.at<uchar>(j, 10);
for (j = 0; j < 40; j++)
num_t[14] += src.at<uchar>(j, 13);
for (int i = 0; i < CHARACTER; i++)
{
num_t[i] /= 255*4;
}
int matchnum = 0; //可以说是 匹配度或 相似度
int matchnum_max = 0;
int matchcode = 0; // 匹配号
j = 0;
for (k = char_begin; k <= char_end; k++)
{
matchnum = 0;
for (i = 0; i < 8; i++) //区域的匹配
{
if (abs(num_t[i] - Num_Templete[k][i]) <= 2)//与模板里的相应值进行匹配
matchnum++;//两者相减,如果绝对值小于2,标记匹配成功一次
}
if (Num_Templete[k][i] - abs(num_t[i]) <= 8)//对第9个特征进行匹配
matchnum += 2;
for (i = 9; i < CHARACTER; i++) // 横竖的匹配
{
if (Num_Templete[k][i] >= 5) //特征值 大于5
{
if (abs(num_t[i] - Num_Templete[k][i]) <= 1)
matchnum += 2;
}
else if (num_t[i] == Num_Templete[k][i])
{
matchnum += 2;
}
}
if (matchnum > matchnum_max)
{
matchnum_max = matchnum; //保留最大的 匹配
matchcode = k; //记录 识别的字符的 索引
//matchtempnum[j]=matchnum_min
}
}
//识别输出 存放输出结果
cout << "templete: ";
for (i = 0; i < 15; i++)
{
cout << num_t[i] << " ";
}
cout << endl;
G_PlateChar[char_num] = PlateCode[matchcode]; //保存下该字符
cout << G_PlateChar[char_num] << endl;
}
//1. 前8个的特征的特取算法是将规一化一的字符打成4行2列的网格,分别统计各网格内的灰度值。
//2. 第9个特征为前8个特征的和;
//3. 10-12个特征分别为第10行,20行,30行的灰度值和;
//4. 13 - 15个特征分别为第7列,10列,13列的灰度值和;
主函数如下:
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/types_c.h>
#include<opencv2/core/core.hpp>
#include<opencv2/highgui/highgui.hpp>
#include<iostream>
#include<sstream>
using namespace cv;
using namespace std;
int segmentation();
void charRecognize(Mat src, int num, int char_num);
Mat binarization(Mat src);
int main()
{
segmentation();
int num = 7, char_num = 7;
for (int i = 0; i < 7; i++)
{
ostringstream oss;
oss << "licenseN" << i << ".jpg";
Mat binary_image = binarization(imread(oss.str()));
ostringstream oss1;
oss1 << "test" << i << ".bmp";
imwrite(oss1.str(), binary_image);
}
for (int i = 0; i < 7; i++)
{
ostringstream oss;
oss << "licenseN" << i << ".jpg";
Mat image = imread(oss.str());
Mat gray_image;
cvtColor(image, gray_image, CV_RGB2GRAY);
Mat gray_image_resize;
resize(gray_image, gray_image_resize, Size(20,40));
charRecognize(gray_image_resize,i,i);
}
waitKey();
}
接下来贴一下支持向量机的方法,数据集来自tf_car_license_dataset,各位百度一下应该就可以找到网盘的链接
再用支持向量机的时候要先把数据转csv格式
import csv
import os
import numpy as np
from PIL import Image
def convert_img_to_csv(img_dir):
#设置需要保存的csv路径
with open(r"D:\license_recognition\svm\convert2csv_validation_chinese-characters.csv","w",newline="") as f:
#设置csv文件的列名
column_name = ["label"]
column_name.extend(["pixel%d"%i for i in range(32*40)])
#将列名写入到csv文件中
writer = csv.writer(f)
writer.writerow(column_name)
#该目录下有9个目录,目录名从0-9
for i in range(6):
#获取目录的路径
img_temp_dir = os.path.join(img_dir,str(i))
#获取该目录下所有的文件
img_list = os.listdir(img_temp_dir)
#遍历所有的文件名称
for img_name in img_list:
#判断文件是否为目录,如果为目录则不处理
if not os.path.isdir(img_name):
#获取图片的路径
img_path = os.path.join(img_temp_dir,img_name)
#因为图片是黑白的,所以以灰色读取图片
img=Image.open(img_path)
#图片标签
row_data = [i]
img_flatten=np.array(np.array(img,dtype=np.uint8).flatten())
#获取图片的像素
row_data.extend(img_flatten)
#将图片数据写入到csv文件中
writer.writerow(row_data)
if __name__ == "__main__":
#将该目录下的图片保存为csv文件
convert_img_to_csv(r"D:\license_recognition\tf_car_license_dataset\tf_car_license_dataset\train_images\validation-set\chinese-characters")
可以看到在文件夹下生成了csv文件,这里注意还要另外把中文字符页转换成csv格式,改一下地址就好了
下面贴上svm的代码
import pandas as pd #读取csv文件
from sklearn import svm #svm包
from sklearn.externals import joblib #保存模型
from sklearn.decomposition import PCA #降维
import time #计算训练时间
if __name__ =="__main__":
train_num = 4286
validation_num = 201
data_train = pd.read_csv('convert2csv.csv')
data_validation=pd.read_csv('convert2csv_validation.csv')
train_data = data_train.values[0:train_num,1:]
train_label = data_train.values[0:train_num,0]
validation_data = data_validation.values[0:validation_num,1:]
validation_label = data_validation.values[0:validation_num,0]
t = time.time()
#PCA降维
pca = PCA(n_components=0.8, whiten=True)
print('start pca...')
train_x = pca.fit_transform(train_data)
validation_x = pca.transform(validation_data)
print(train_x.shape)
# svm训练
print('start svc...')
svc = svm.SVC(kernel = 'rbf', C = 34)
svc.fit(train_x,train_label)
pre = svc.predict(validation_x)
#保存模型
joblib.dump(svc, 'model.m')
joblib.dump(pca, 'pca.m')
# 计算准确率
score = svc.score(validation_x, validation_label)
print(u'准确率:%f,花费时间:%.2fs' % (score, time.time() - t))
中文字符同样修改一下即可
贴一个测试的代码
from sklearn.externals import joblib
import numpy as np
from PIL import Image
if __name__ =="__main__":
LETTERS_DIGITS = ("0","1","2","3","4","5","6","7","8","9","A","B","C","D",
"E","F","G","H","J","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z")
result=[]
first_time=True
for i in range(1,7):
img = Image.open(r'D:\license_recognition\Project2\Project2\test%s.bmp'%i)
img = img.resize((32, 40),Image.ANTIALIAS)
img=np.array(img,dtype=np.uint8).flatten()
test = img.reshape(1,1280)
#加载模型
svc = joblib.load("model.m")
pca = joblib.load("pca.m")
# svm
if(first_time):
print('start pca...')
first_time=False
test_x = pca.transform(test)
pre = svc.predict(test_x)
result.append(LETTERS_DIGITS[int(pre)])
print("预测结果为:",result)
来自https://blog.youkuaiyun.com/Foreverllove/article/details/80791604
最后上一个cnn的代码
来自https://blog.youkuaiyun.com/ShadowN1ght/article/details/78571187
这篇博客写的很详细,这里就不再赘述啦