OCR-Python-Opencv一种基于坐标投影的图片旋转矫正方法

最新推荐文章于 2025-10-09 16:45:30 发布

原创最新推荐文章于 2025-10-09 16:45:30 发布 · 3.9k 阅读

40 ·

CC 4.0 BY-SA版权

文章标签：

#Opencv #Ocr #文本矫正

本文介绍了使用Opencv进行OCR前的图像预处理，特别是针对旋转角度在-35~35度的图片，提出了一种基于坐标投影的矫正算法。该方法通过灰度化、二值化后，对图像进行角度遍历并计算非零像素行数，找到最小投影的旋转角度以实现矫正。虽然有一定的抗干扰能力，但不适用于全包围的图片。附带了完整的Python代码实现。

由于最近公司业务需要使用OCR功能，所以我不得不研究Opencv这个图像库，进行OCR功能前期的图像处理工作，二值化、灰度化什么的都非常简单这里就不过多讲述，下面记录我自己受到启发自己实现的一种基于坐标投影的图片矫正算法，其实思路非常简单，大神勿喷，再次做一个小分享也算是我自己对该方面知识的一个总结和整理。

ps:该算法适合旋转角度不大，在-35~35度的旋转角矫正，具备一定抗干扰能力（当然也不是万能的，这个算法可以当做其它矫正算法的补充）

首先来看一下效果

左边是原图，右边是矫正后的图

这张图我为了验证其抗干扰能力，我刻意加了一下乱七八糟的东西干扰，效果看起来视乎还是不错的

这张视乎还有一点点倾斜，看起来视乎还不是很正，不过相对于原来的字来说已经正很多了。

原理

其实这个原理非常简单，就是把一张图经过“灰度化”“二值化”“自动阈值”后的图像从-35°到35度进行旋转遍历，每张旋转后的图像投影到x或者y坐标上（我这里是使用了投影到y坐标上），然后统计其在该坐标上的非0像素行数，非零像素行数最少的旋转角度就是该图片需要旋转的角度。下面是一张灰常直截了当的指示图，就是取其在Y坐标上像素的投影最少行数的角度就是其旋转角度。

弊端：

这个方式可以矫正大部分情况下的图片，但是对于一下全包围的图片可能无法进行矫正，并非万能的，仅供参考，例如下面的图就矫正不了，因为无论怎么旋转其投影都是一样的。

代码：

好了废话了这么多直接贴代码咯，有兴趣的研究研究一起学习，欢迎下面评论。下面是一整个.py文件，拷贝过去就可以了，其中要的库报错的自行安装。

基于投影的倾斜角计算.py

"""
一种基于投影的文本图片校正算法，具备高抗干扰，高准确的特点
author:小虫之家
"""

import cv2
import numpy as np
import math
import imutils

def strt():
    image = cv2.imread('images/ttt.jpg', 0)
    x, y = image.shape[0:2]
    image = cv2.resize(image, (int(y / 3), (int(x / 3))))
    normalImage = image
    # 灰度图片
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # 二值化
    cv2.imshow("hui du hua：", gray)  # 展示图片
    edges = cv2.Canny(gray, 50, 120)
    cv2.imshow("edges ", edges)
    minLineLength = 1
    maxLineGap = 12
    binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, 0)


## 图片旋转
def rotate_bound(image, angle):
    # # 获取宽高
    # (h, w) = image.shape[:2]
    # (cX, cY) = (w // 2, h // 2)
    #
    # # 提取旋转矩阵 sin cos
    # M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    return imutils.rotate(image, -angle)  # 18

"""
计算图片的旋转角度
"""
def getImageRotate(image):
    imageHeight, imageWidth= image.shape[0:2]
    print(imageWidth, ", ", imageHeight)
    swapImage = image.copy()
    templateImageWidth = 0
    templateImageHeight = 0
    toWidth = 500
    if imageWidth > toWidth and imageWidth > imageHeight:
        templateImageWidth = toWidth
        templateImageHeight = toWidth / imageWidth * imageHeight
    elif imageHeight > toWidth and imageHeight > imageWidth:
        templateImageHeight = toWidth
        templateImageWidth = toWidth / imageHeight * imageWidth
    # 使用Numpy创建一张全黑纸
    lastImageWidth = templateImageWidth
    if templateImageWidth < templateImageHeight:
        lastImageWidth = templateImageHeight
    else:
        lastImageWidth = templateImageWidth
    lastImageWidth = int(math.sqrt(lastImageWidth * lastImageWidth * 2))
    templateImage = np.zeros((lastImageWidth, lastImageWidth, 3), np.uint8)
    # 使用黑色填充图片区域
    templateImage.fill(0)
    # cv2.imshow("templateImage", templateImage)
    print(templateImageWidth, ", ", templateImageHeight)
    swapImage = cv2.resize(swapImage, (int(templateImageWidth), int(templateImageHeight)))
    # cv2.imshow("swapImage", swapImage)
    grayImage = cv2.cvtColor(swapImage, cv2.COLOR_BGR2GRAY)
    # cv2.imshow("grayImage", grayImage)
    # gaussianBlurImage = cv2.GaussianBlur(grayImage, (3, 3), 3)
    binaryImage = cv2.adaptiveThreshold(grayImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 15, 35)
    # ret, thresh1 = cv2.threshold(binaryImage, 127, 255, cv2.THRESH_BINARY)  # 大于阈值为白
    swapBinnaryImage = ~binaryImage
    # cv2.imshow("swapBinnaryImage", binaryImage)
    width, height = templateImage.shape[0:2]
    center = (height // 2, width // 2)
    print(width, height)
    mask = 255 * np.ones(swapBinnaryImage.shape, swapBinnaryImage.dtype)
    checkBaseImage = cv2.seamlessClone(swapBinnaryImage, templateImage, mask, center, cv2.NORMAL_CLONE)
    cv2.imshow("checkBaseImage", checkBaseImage)
    minRotate = 0
    minCount = -100
    maxPixSum = -100
    for rotate in range(-35, 35):
       rotateImage = rotate_bound(checkBaseImage, rotate)
       rotateImageWidth = len(rotateImage)
       xPixList = []
       pixSum = 0
       for i in range(rotateImageWidth):
           lineCount = 0
           pixSum += cv2.sumElems(rotateImage[i])[0]
           lineCount += cv2.countNonZero(rotateImage[i])
           if lineCount > 0:
               xPixList.append(lineCount)
       # if pixSum == -100:
       #     maxPixSum = pixSum
       #     minRotate = rotate
       # if pixSum > maxPixSum:
       #     maxPixSum = pixSum
       #     minRotate = rotate
       if minCount == -100:
           minCount = len(xPixList)
           minRotate = rotate
       # print(len(xPixList), ", ", minCount)
       if len(xPixList) < minCount:
            minCount = len(xPixList)
            minRotate = rotate
       # print(minRotate)
    print("over: rotate = ", minRotate)
    print("maxPixSum = ", maxPixSum)
    return minRotate

if __name__=="__main__":
    imagePath = "images/lei.jpg"
    image = cv2.imread(imagePath)
    cv2.imshow("normalImage", image)
    rotateAngle = getImageRotate(image)
    print("lastAngle = ", rotateAngle)
    image = rotate_bound(image, rotateAngle)
    cv2.imshow("rotateImage", image)
    cv2.waitKey(0)