数字验证码的识别

本文介绍了如何识别规范的数字验证码,主要方法是对样本进行比对,通过保存的字模与待识别图片进行比较,提取并处理图像数据,最终确定最接近的匹配结果。代码包括图像数据提取、字模初始化和匹配检查等步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

转自:http://www.blogjava.net/hadeslee/archive/2007/09/28/148814.html

数字验证码很多地方都会用到,我前段时间也写过一篇有关于生成验证码的文章,那是随机生成大小不一,颜色不一,形状不一的数字图片,本文主要是针对那些比较规范的验证码的识别,何谓规范?规范就是数字的大小几乎一致,颜色对比度挺高,没什么干扰线.识别的依据就是最最最基础的办法,比对,先取样,保存成字模,再用字模去和将要识别的图片进行比较,取最接近的那个结果.不过在比较之前必须得到图片里面的数据提取出来并适当地去除一些干扰.
下面就是识别部份的代码:

/*
* ImageCode.java
*
* Created on 2007年1月18日, 下午10:00
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package net.bccn.hadeslee.programfan;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.net.URL;
import javax.imageio.ImageIO;
/**
* 验证码识别程序
* @author hadeslee
*/
public class ImageCode {
private BufferedImage bi;
private static int[][][] model=new int[5][10][208];
//静态初始化块
static{
        initNumModel();
    }
/**
     * Creates a new instance of ImageCode
*/
public ImageCode() {
        initNumModel();
    }
public String getNumber(InputStream is){
try{
            bi= ImageIO.read( is );
final StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
                sb.append(this.doCheck(data));
            }
return sb.toString();
        } catch(Exception exe){
            exe.printStackTrace();
return "";
        }
    }
/**
     * 重载的方法,根据传进来的参数得到返回的字符串
     * @param bi
     * @return 结果
*/
public String getNumber(BufferedImage bi){
try{
this.bi= bi;
            StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
                sb.append(this.doCheck(data));
            }
//System.out.println(sb.toString());
return sb.toString();
        } catch(Exception exe){
            exe.printStackTrace();
return "";
        }
    }
/**
     * 静态初始化方法,
     * 用于初始化字模
*/
private static void initNumModel(){
try{
//System.out.println("初始化model");
for(int i=0;i<10;i++){
                StreamTokenizer st=new StreamTokenizer(new InputStreamReader(ImageCode.class.getResourceAsStream("/net/bccn/hadeslee/model/programfan_"+i+".mod")));
                st.whitespaceChars('#','#');
                st.whitespaceChars(',',',');
                st.eolIsSignificant(false);
                out:while(true){
int token=st.nextToken();
if(token==StreamTokenizer.TT_WORD){
int who=0;
int index=0;
if(st.sval.equals("center")){
                            who=0;
                        }else if(st.sval.equals("left")){
                            who=1;
                        }else if(st.sval.equals("right")){
                            who=2;
                        }else if(st.sval.equals("up")){
                            who=3;
                        }else if(st.sval.equals("down")){
                            who=4;
                        }
while(st.nextToken()==StreamTokenizer.TT_NUMBER){
                            model[who][i][index++]=(int)st.nval;
                        }
                        st.pushBack();
                    }else if(token==StreamTokenizer.TT_EOF){
break out;
                    }
                }
            }
        } catch(Exception exe){
            exe.printStackTrace();
        }
//System.out.println("初始化结束model");
    }
//通过传进来的字符串得到BufferedImage对象
private BufferedImage getBI(String url){
try {
return ImageIO.read(new URL(url));
        } catch (IOException ex) {
            ex.printStackTrace();
return null;
        }
    }
/**根据索引得到
     *某一块的图像转为数组
     *的文件
*/
private int[] getData(int index){
        BufferedImage sub=bi.getSubimage(index*16,0,16,13);
int iw=sub.getWidth();
int ih=sub.getHeight();
int[] demo=new int[iw*ih];
for(int i=0;i
for(int j=0;j
                demo[i*iw+j]=(sub.getRGB(j,i)==-1?0:1);
            }
        }
return demo;
    }
//根据传进来的数组,得到五个位置当中和差别最小的那个
private int getMin(int who,int[] demo){
int temp=208;
for(int i=0;i<5;i++){
int x=0;
for(int j=0;j
                x+=(model[i][who][j]==demo[j]?0:1);
            }
if(x
                temp=x;
            }
        }
//System.out.println("比对"+who+"最小值是"+temp);
return temp;
    }
//分析689或者0的方法,以免这几个数字混淆
private int get689(int[] demo,int origin){
boolean isLeft=false,isRight=false;
int temp=-1;
if((demo[75]==1&&demo[90]==1)||(demo[76]==1&&demo[91]==1)||
                (demo[58]==1&&demo[74]==1&&demo[90]==1)||(demo[59]==1&&demo[75]==1&&demo[91]==1)||
                (demo[60]==1&&demo[76]==1&&demo[92]==1)||(demo[28]==1&&demo[44]==1&&demo[60]==1)||
                (demo[27]==1&&demo[43]==1&&demo[59]==1)){
            isRight=true;
        }
if((demo[131]==1&&demo[147]==1)||(demo[132]==1&&demo[148]==1)||(demo[133]==1&&demo[149]==1)){
            isLeft=true;
        }
if(isLeft&&isRight){
            temp=8;
        }else if(isLeft){
            temp=6;
        }else if(isRight){
            temp=9;
        }else{
            temp=origin;
        }
if(temp==8&&(!((demo[103]==1&&demo[104]==1&&demo[105]==1&&demo[106]==1)||
                (demo[87]==1&&demo[88]==1&&demo[89]==1&&demo[90]==1)||
                (demo[103]+demo[104]+demo[105]+demo[106]+demo[87]+demo[88]+
                demo[89]+demo[90]>3)))){
return temp=0;
        }
return temp;
    }
//比较传入的数据,返回最接近的值
private int doCheck(int[] demo){
int number=-1;
int temp=208;
for(int i=0;i<10;i++){
int x=this.getMin(i,demo);
if(x
                temp=x;
                number=i;
            }
        }
//System.out.println("===========================================");
if(number==6||number==8||number==9){
            number=this.get689(demo,number);
        }
return number;
    }
}

下面是一些字模的内容,把它保存成相应的文件,并能让程序找到就可以了.

比如这是0的字模,它在不同位字模,以此类推.这些字模都是先取到样本,然后再分类的

#center
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
#left
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
#right
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
#up
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
#down
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,

在此算法的实现中主要是针对比较规范的验证码,然后还要针对外形比较相似的6890进行分辨,实现识别的方式有很多种,大家仁者见仁,智者见智吧.不过,说句题外话,MOTO的识别就很牛,它对手写字体的支持都能达到很高的识别率,更不要说是正体了,这就是另外一个领域了.不是一两句代码就能搞得定的:)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值