from ddddocr import DdddOcr
import requests
from urllib.parse import urlparse
import urllib3
# 禁用 SSL 警告
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class CaptchaRecognizer:
def __init__(self):
# 初始化 ddddocr
self.ocr = DdddOcr(show_ad=False)
def is_url(self, path):
"""检查是否是URL"""
try:
result = urlparse(path)
return all([result.scheme, result.netloc])
except:
return False
def get_image_content(self, path, timeout=10):
"""获取图片内容,支持本地文件和网络URL"""
try:
if self.is_url(path):
# 下载网络图片
response = requests.get(
path,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
},
verify=False,
timeout=timeout
)
if response.status_code == 200:
return response.content
raise Exception(f"下载图片失败: HTTP {response.status_code}")
else:
# 读取本地文件
with open(path, 'rb') as f:
return f.read()
except Exception as e:
print(f"获取图片失败: {str(e)}")
return None
def recognize(self, image_path, timeout=10):
"""识别验证码图片
Args:
image_path: 图片文件路径或URL
timeout: 网络请求超时时间(秒)
Returns:
str: 识别结果,失败返回None
"""
try:
# 获取图片内容
image_bytes = self.get_image_content(image_path, timeout)
if not image_bytes:
return None
# 识别验证码
result = self.ocr.classification(image_bytes)
return result
except Exception as e:
print(f"识别失败: {str(e)}")
return None
def recognize_captcha(image_path, timeout=10):
"""便捷函数,直接识别验证码
Args:
image_path: 图片文件路径或URL
timeout: 网络请求超时时间(秒)
Returns:
str: 识别结果,失败返回None
"""
recognizer = CaptchaRecognizer()
return recognizer.recognize(image_path, timeout)
# 使用示例
if __name__ == "__main__":
# 方式1:使用便捷函数
result = recognize_captcha("http://**.*.*/captcha.jpg")
print(f"识别结果1: {result}")
# 方式2:使用类
recognizer = CaptchaRecognizer()
result = recognizer.recognize("http://*.*.*.*/captcha.jpg")
print(f"识别结果2: {result}")
05-11
2111

09-16