Python验证码识别

#!/usr/bin/env python
# coding=utf-8

tesseract_cmd = 'D:\\Tesseract-OCR\\tesseract'

try:
    from PIL import Image
    import pytesseract
    import urllib2
    import urllib
    import cookielib
    import re
    import subprocess
    import sys
    import tempfile
    import os
    import shlex
    import json
except ImportError:
    print '模块导入错误,请使用pip安装'
    raise SystemExit

'''文件目录,并返回该目录'''
def mkdir(path):
    # 去除左右两边的空格
    path=path.strip()
    # 去除尾部 \符号
    path=path.rstrip("\\")
    if not os.path.exists(path):
        os.makedirs(path)   
    return path

def get_yundapic():
    '''装载cookie'''
    cj = cookielib.CookieJar();
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    opener.addheaders={('User-agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0')}
    urllib2.install_opener(opener)

    picUrl = 'http://ykjcx.yundasys.com/zb1qBpg2.php'
    req1 = urllib2.Request(picUrl)
    operate=opener.open(req1)
    data=operate.read()
    return (cj,data);

def save_pic(input_file_name,data):
    if data == None:
        return
    file=open(input_file_name, "wb")
    file.write(data)
    file.flush()
    file.close()

def tempnam():
    tmpfile = tempfile.NamedTemporaryFile(prefix="tess_")
    return tmpfile.name

def cleanup(filename):
    try:
        os.remove(filename)
    except OSError:
        pass

def run_tesseract(input_filename, output_filename_base):
    command = [tesseract_cmd, input_filename, output_filename_base]  
    proc = subprocess.Popen(command,stderr=subprocess.PIPE,shell=True)
    return (proc.wait(), proc.stderr.read())

def image_to_string(data):
    output_file_name_base = tempnam()
    input_file_name = '%s.png' % tempnam()
    output_file_name = '%s.txt' % output_file_name_base
    
    try:
        save_pic(input_file_name,data)
        status, error_string = run_tesseract(input_file_name,output_file_name_base)
        if status:
            raise 
        f = open(output_file_name)
        try:
            return f.read().strip()
        finally:
            f.close()
    finally:
        cleanup(input_file_name)
        cleanup(output_file_name)

def save_data(cookies,vcode):
    for ck in cookies:
        if ck.name == 'PHPSESSID':
            cookie = ck.value
            saveUrl = '保存cookie和验证码地址'
            values = {'cookie':cookie,'websiteid':1,'code':vcode}
            postDate = json.dumps(values) 
            req = urllib2.Request(saveUrl,postDate)
            response = urllib2.urlopen(req)
            page = response.read()
            return page

def main():
    while (True):
        cj,input_file_stream = get_yundapic()
        vcode = image_to_string(input_file_stream)
        if vcode != '' and len(vcode) == 3:
            if re.match(r'[a-zA-z]+$',vcode):
                print '識別錯誤'
            else:
                try:
                    num = eval(vcode)
                    print num
                    print save_data(cj,eval(vcode));
                except:
                    pass
                
    '''
    while (True):
        cj,captcha,input_file_stream = get_pic()
        vcode = image_to_string(input_file_stream)
        if vcode != '' and len(vcode) == 4:
            if re.match(r'[a-zA-z]+$',vcode):
                print(save_data(cj,captcha,vcode))
        else:
            print '識別錯誤'
            
    '''
    '''
    cj,input_file_stream = get_yundapic()
    vcode = image_to_string(input_file_stream)
    print vcode
    '''
if __name__ == '__main__':
    main()

 比较喜欢直接粗暴的贴代码,不多说,自己看代码,COPY即运行

转载于:https://www.cnblogs.com/zwdo/p/5697256.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值