基于OpenCV的图像拼接与文档检测:从特征提取到透视变换
在计算机视觉领域,图像拼接和文档检测是两个非常实用的应用场景。图像拼接可以将多张图像组合成一张更大的图像,从而提供更广阔的视野;而文档检测则可以自动识别图像中的文档轮廓,并将其矫正为标准的矩形图像。本文将详细介绍如何使用OpenCV实现这两个功能,包括特征点提取、匹配、透视变换以及文档检测等关键步骤。
一、背景介绍
(一)图像拼接
图像拼接的核心在于找到两张图像之间的对应关系,然后通过几何变换将它们对齐。SIFT(Scale-Invariant Feature Transform)算法是一种经典的特征点提取方法,能够检测出图像中的关键点并计算其描述符,这些描述符对光照、尺度和旋转具有一定的不变性,非常适合用于图像拼接任务。
(二)文档检测
文档检测的目标是从图像中识别出文档的轮廓,并将其矫正为标准的矩形图像。这通常需要通过边缘检测、轮廓提取和透视变换来实现。OpenCV提供了强大的工具来完成这些任务。
二、图像拼接实现
(一)特征点提取与描述符计算
首先,我们需要从输入的两张图像中提取特征点并计算其描述符。以下是代码实现:
def detectAndDescribe(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 将彩色图片转换成灰度图
descriptor = cv2.SIFT_create() # 建立SIFT生成器
(kps, des) = descriptor.detectAndCompute(gray, None) # 检测SIFT特征点并计算描述符
kps_float = np.float32([kp.pt for kp in kps]) # 将关键点坐标转换为浮点数
return (kps, kps_float, des) # 返回特征点集、坐标和描述符
(二)特征点匹配
接下来,我们使用暴力匹配器(BFMatcher)来匹配两张图像的特征点。为了提高匹配的准确性,我们采用KNN匹配,并通过距离比值来筛选出可靠的匹配对:
matcher = cv2.BFMatcher()
rawMatches = matcher.knnMatch(desB, desA, 2) # KNN匹配
good = []
matches = []
for m in rawMatches:
if len(m) == 2 and m[0].distance < 0.65 * m[1].distance: # 距离比值筛选
good.append(m)
matches.append((m[0].queryIdx, m[0].trainIdx))
(三)透视变换与图像拼接
当匹配对的数量大于4时,我们可以使用cv2.findHomography
函数计算透视变换矩阵。该函数支持多种方法,包括RANSAC(随机抽样一致性算法),它能够有效排除异常点的影响,从而提高变换矩阵的鲁棒性。以下是代码实现:
if len(matches) > 4:
ptsB = np.float32([kps_floatB[i] for (i, _) in matches])
ptsA = np.float32([kps_floatA[i] for (_, i) in matches])
(H, mask) = cv2.findHomography(ptsB, ptsA, cv2.RANSAC, 10) # 计算透视变换矩阵
result = cv2.warpPerspective(imageB, H, (imageB.shape[1] + imageA.shape[1], imageB.shape[0])) # 应用透视变换
result[0:imageA.shape[0], 0:imageA.shape[1]] = imageA # 将图像A拼接到结果图像中
(四)运行结果
三、文档检测实现
(一)边缘检测与轮廓提取
文档检测的第一步是通过边缘检测提取图像中的轮廓。我们使用Canny边缘检测算法来实现这一目标:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 转换为灰度图
gray = cv2.GaussianBlur(gray, (5, 5), 0) # 高斯滤波
edged = cv2.Canny(gray, 75, 200) # Canny边缘检测
接下来,我们使用cv2.findContours
函数提取图像中的轮廓,并按照面积大小进行排序:
cnts = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:3]
(二)轮廓近似与文档矫正
我们通过轮廓近似算法找到文档的四个角点,并使用透视变换将其矫正为标准的矩形图像:
for c in cnts:
peri = cv2.arcLength(c, True) # 计算轮廓的周长
approx = cv2.approxPolyDP(c, 0.05 * peri, True) # 轮廓近似
area = cv2.contourArea(approx)
if area > 20000 and len(approx) == 4: # 确保轮廓是一个四边形
screenCnt = approx
break
透视变换的实现如下:
def four_point_transform(image, pts):
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
四、完整代码
以下是完整的代码实现:
(一)图像拼接
import cv2
import numpy as np
import sys
def cv_show(name, img):
cv2.imshow(name, img)
cv2.waitKey(0)
def detectAndDescribe(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
descriptor = cv2.SIFT_create()
(kps, des) = descriptor.detectAndCompute(gray, None)
kps_float = np.float32([kp.pt for kp in kps])
return (kps, kps_float, des)
imageA = cv2.imread("1.jpg")
cv_show('imageA', imageA)
imageB = cv2.imread("2.jpg")
cv_show('imageB', imageB)
(kpsA, kps_floatA, desA) = detectAndDescribe(imageA)
(kpsB, kps_floatB, desB) = detectAndDescribe(imageB)
matcher = cv2.BFMatcher()
rawMatches = matcher.knnMatch(desB, desA, 2)
good = []
matches = []
for m in rawMatches:
if len(m) == 2 and m[0].distance < 0.65 * m[1].distance:
good.append(m)
matches.append((m[0].queryIdx, m[0].trainIdx))
print(len(good))
print(matches)
vis = cv2.drawMatchesKnn(imageB, kpsB, imageA, kpsA, good, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv_show("Keypoint Matches", vis)
if len(matches) > 4:
ptsB = np.float32([kps_floatB[i] for (i, _) in matches])
ptsA = np.float32([kps_floatA[i] for (_, i) in matches])
(H, mask) = cv2.findHomography(ptsB, ptsA, cv2.RANSAC, 10)
result = cv2.warpPerspective(imageB, H, (imageB.shape[1] + imageA.shape[1], imageB.shape[0]))
result[0:imageA.shape[0], 0:imageA.shape[1]] = imageA
cv_show('result', result)
else:
print('图片未找到4个以上的匹配点')
sys.exit()
(二)文档检测
import cv2
import numpy as np
def cv_show(name, img):
cv2.imshow(name, img)
cv2.waitKey(0)
def order_points(pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Cannot open camera")
exit()
while True:
flag = 0
ret, image = cap.read()
orig = image.copy()
if not ret:
print("不能读取摄像头")
break
cv_show("image", image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
cv_show('1', edged)
cnts = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:3]
image_contours = cv2.drawContours(image, cnts, -1, (0, 255, 0), 2)
cv_show("image_contours", image_contours)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
area = cv2.contourArea(approx)
if area > 20000 and len(approx) == 4:
screenCnt = approx
flag = 1
print(peri, area)
print('检测到文档')
break
if flag == 1:
image_contours = cv2.drawContours(image, [screenCnt], 0, (0, 255, 0), 2)
cv_show("image", image_contours)
warped = four_point_transform(orig, screenCnt.reshape(4, 2))
cv_show("warped", warped)
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
ref = cv2.threshold(warped, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv_show("ref", ref)
cap.release()
cv2.destroyAllWindows()
五、总结
通过上述步骤,我们成功实现了基于OpenCV的图像拼接和文档检测功能。SIFT算法能够提取出具有鲁棒性的特征点,而RANSAC方法则确保了透视变换矩阵的可靠性。在文档检测中,边缘检测和轮廓提取是关键步骤,而透视变换则可以将文档矫正为标准的矩形图像。这些技术在实际应用中具有广泛的价值,例如在图像处理、文档扫描和机器人视觉等领域。