隶属于:
C:\Users\Public\Documents\MVTec\HALCON-23.05-Progress\examples\hdevelop\OCR\Convolutional-Neural-Networks
此例:一副图上有小写字母a-z和.将所有的字符区域提取然后转成矩形,再进行扭曲,扭曲之后进行识别:分别用do_ocr_word_cnn (CharRegion, CharImage, OCRHandleCNN, ‘[a-z.]’, 3, 2, ClassRead, Confidence, ClassCorrectedReadCNN, Score)和 do_ocr_word_mlp (CharRegion, CharImage, OCRHandleMLP, ‘[a-z.]’, 3, 2, ClassRead, Confidence, ClassCorrectedReadMLP, Score)识别,通过已知的类别和识别的类别来判断识别错误率。比较下,cnn比mlp识别率更高一些,同时耗时也更长一些。
以下是两组结果:
读图
并读入mlpOCR句柄;
read_ocr_class_mlp (‘Industrial_NoRej’, OCRHandleMLP)
和CNN的OCR句柄
read_ocr_class_cnn (‘Universal_NoRej.occ’, OCRHandleCNN)
提取字符区域,并形成连通域之后进行排序
初始化类中字符
这里可借鉴:生成包含所有字符的数组
生成20个a,b,…z;
count_obj (Characters, Number)
Length := Number / 27
Classes := []
for J := 0 to 25 by 1
Classes := [Classes,gen_tuple_const(Length,chr(ord(‘a’) + J))]
endfor
Classes := [Classes,gen_tuple_const(Length,‘.’)]
显示
=扭曲图像的函数:gen_distorted_character 说明==================================
gen_distorted_character (ImageChar, ImageCharDistort)
函数核心内容
get_image_size (ImageChar, Width, Height)
EroDil := rand(1) * 4 - 2
if (EroDil < 0)
gray_erosion_shape (ImageChar, CharEroDil, 1 - EroDil, 1 - EroDil, ‘rhombus’)
else
gray_dilation_shape (ImageChar, CharEroDil, 1 + EroDil, 1 + EroDil, ‘rhombus’)
endif
hom_mat2d_identity (HomMat2DIdentity)
hom_mat2d_rotate (HomMat2DIdentity, rad(rand(1) * 20 - 10), Height / 2, Width / 2, HomMat2DRotate)
hom_mat2d_translate (HomMat2DRotate, rand(1) * 2 - 1, rand(1) * 2 - 1, HomMat2DTranslate)
affine_trans_image (CharEroDil, CharTrans, HomMat2DTranslate, ‘constant’, ‘false’)
expand_domain_gray (CharTrans, CharExpanded, 2)
gen_image_proto (CharExpanded, ImageBlank, 128)
add_noise_white (ImageBlank, ImageNoise, 60)
smooth_image (ImageNoise, ImageSmooth, ‘gauss’, 1)
scale_image (ImageSmooth, ImageScaled, 3, -256)
add_image (CharExpanded, ImageScaled, ImageCharDistort, 1, -128)
return ()
这里获取到图像大小,生成一个随机数,进行灰度形态学,然后随机旋转平移,
ImageChar
EroDil := rand(1) * 4 - 2
if (EroDil < 0)
gray_erosion_shape (ImageChar, CharEroDil, 1 - EroDil, 1 - EroDil, ‘rhombus’)
else
gray_dilation_shape (ImageChar, CharEroDil, 1 + EroDil, 1 + EroDil, ‘rhombus’)
endif
CharEroDil
hom_mat2d_identity (HomMat2DIdentity)
hom_mat2d_rotate (HomMat2DIdentity, rad(rand(1) * 20 - 10), Height / 2, Width / 2, HomMat2DRotate)
hom_mat2d_translate (HomMat2DRotate, rand(1) * 2 - 1, rand(1) * 2 - 1, HomMat2DTranslate)
affine_trans_image (CharEroDil, CharTrans, HomMat2DTranslate, ‘constant’, ‘false’)
CharTrans
expand_domain_gray (CharTrans, CharExpanded, 2)
CharExpanded
gen_image_proto (CharExpanded, ImageBlank, 128)
ImageBlank
add_noise_white (ImageBlank, ImageNoise, 60)
ImageNoise
smooth_image (ImageNoise, ImageSmooth, ‘gauss’, 1)
ImageSmooth
scale_image (ImageSmooth, ImageScaled, 3, -256)
ImageScaled
add_image (CharExpanded, ImageScaled, ImageCharDistort, 1, -128)
CharExpanded
ImageScaled
ImageCharDistort
将原字符平移之后,加了开闭运算得到的图1,然后生成一张新图又增加了高斯白噪声之后进行了灰度值拉伸得到图2。将两张图相加作为扭曲图像。
=扭曲图像的函数:gen_distorted_character 说明结束==================================
对选定的字符引入扭曲,将扭曲图存放数组。同步将字符区域也放入数组
使用CNN字体读取扭曲的字符。
使用MLP字体读取扭曲的字符。
字符C
错误数累加,除以扭曲图的总数。得到错误率
* 本示例比较了预训练的多层感知机(MLP)和、
* 卷积神经网络(CNN)字体在人工扭曲字符上的性能
dev_update_off ()
dev_close_window ()
read_image (Image, 'letters')
dev_open_window_fit_image (Image, 0, 0, -1, -1, WindowHandle)
set_display_font (WindowHandle, 16, 'mono', 'true', 'false')
read_ocr_class_mlp ('Industrial_NoRej', OCRHandleMLP)
read_ocr_class_cnn ('Universal_NoRej.occ', OCRHandleCNN)
*
* 分割字符
binary_threshold (Image, DarkRegion, 'max_separability', 'dark', UsedThreshold)
dilation_circle (DarkRegion, CharRegion, 2.500000)
connection (CharRegion, CharConnected)
intersection (CharConnected, DarkRegion, CharOrig)
sort_region (CharOrig, Characters, 'character', 'true', 'row')
*
* 计算每个字符的真实类别。
count_obj (Characters, Number)
Length := Number / 27
Classes := []
for J := 0 to 25 by 1
Classes := [Classes,gen_tuple_const(Length,chr(ord('a') + J))]
endfor
Classes := [Classes,gen_tuple_const(Length,'.')]
*
* 显示扭曲之前的原始字符。
dev_display (Image)
dev_set_colored (12)
dev_set_draw ('margin')
dev_set_shape ('rectangle1')
dev_display (Characters)
dev_disp_text (['Read distorted variants of the shown characters', 'with pretrained MLP and CNN fonts.'], 'window', 12, 12, 'black', [], [])
dev_disp_text ('Press Run (F5) to continue', 'window', 'bottom', 'right', 'black', [], [])
stop ()
set_window_param (WindowHandle, 'flush', 'false')
*
* 现在对字符引入严重的扭曲,并确定错误率和分类时间。
NumDistort := 10
ErrorsCNN := 0
ErrorsMLP := 0
TimeCNN := 0
TimeMLP := 0
Num := 0
for J := 0 to |Classes| - 1 by 1
* 对选定的字符引入扭曲。
select_obj (Characters, Char, J + 1)
shape_trans (Char, RegionTrans, 'rectangle1')
dilation_rectangle1 (RegionTrans, RegionDilation, 5, 5)
reduce_domain (Image, RegionDilation, ImageReduced)
crop_domain (ImageReduced, ImageChar)
Class := Classes[J]
get_image_size (ImageChar, Width, Height)
gen_empty_obj (CharRegions)
gen_empty_obj (CharImages)
for K := 1 to NumDistort by 1
gen_distorted_character (ImageChar, ImageCharDistort)
concat_obj (CharImages, ImageCharDistort, CharImages)
binary_threshold (ImageCharDistort, DarkRegion, 'max_separability', 'dark', UsedThreshold)
concat_obj (CharRegions, DarkRegion, CharRegions)
endfor
* 使用CNN字体读取扭曲的字符。
ErrorBefore := ErrorsCNN
count_seconds (S1)
for K := 1 to NumDistort by 1
select_obj (CharImages, CharImage, K)
select_obj (CharRegions, CharRegion, K)
do_ocr_word_cnn (CharRegion, CharImage, OCRHandleCNN, '[a-z.]', 3, 2, \
ClassRead, Confidence, ClassCorrectedReadCNN, Score)
if (ClassCorrectedReadCNN != Class)
ErrorsCNN := ErrorsCNN + 1
endif
endfor
count_seconds (S2)
if (ErrorBefore == ErrorsCNN)
ColorCNN := 'forest green'
else
ColorCNN := 'red'
endif
TimeCNN := TimeCNN + S2 - S1
* 使用MLP字体读取扭曲的字符。
ErrorBefore := ErrorsMLP
count_seconds (S1)
for K := 1 to NumDistort by 1
select_obj (CharImages, CharImage, K)
select_obj (CharRegions, CharRegion, K)
do_ocr_word_mlp (CharRegion, CharImage, OCRHandleMLP, '[a-z.]', 3, 2, ClassRead, Confidence, ClassCorrectedReadMLP, Score)
if (ClassCorrectedReadMLP != Class)
ErrorsMLP := ErrorsMLP + 1
endif
endfor
count_seconds (S2)
if (ErrorBefore == ErrorsMLP)
ColorMLP := 'forest green'
else
ColorMLP := 'red'
endif
TimeMLP := TimeMLP + S2 - S1
*
* 显示结果
dev_set_part (0, 0, Height - 1, Width - 1)
dev_display (ImageCharDistort)
dev_set_part (0, 0, 499, 499)
Num := Num + NumDistort
Text := 'Classifier Mean reading time Errors'
Text[1] := ' CNN ' + (TimeCNN / Num * 1000)$'15.2f' + ' ms ' + (real(ErrorsCNN) / Num * 100)$'15.2f' + ' %'
Text[2] := ' MLP ' + (TimeMLP / Num * 1000)$'15.2f' + ' ms ' + (real(ErrorsMLP) / Num * 100)$'15.2f' + ' %'
dev_disp_text (Text, 'window', 'top', 'left', ['black',ColorCNN,ColorMLP], [], [])
flush_buffer (WindowHandle)
endfor
dev_clear_window ()
set_window_param (WindowHandle, 'flush', 'true')
Text := ['Final result:\n',Text]
dev_disp_text (Text, 'window', 'top', 'left', 'white', 'box', 'false')
stop ()