由于需要对文档图像数据进行增强,需要对完好的文档图像加个页脚翻折效果,对文档进行折旧化。
效果(左边为效果图,右边为原图):


思路
针对某个顶点,先确定一个矩阵,然后将上三角的矩阵的值赋值到下三角,同时将上三角的值置为白色(255)。其中最重要的是确定对角线和一个点关于这条对角线对称的点。
关于直线对称点的公式:

其中A, B, C为一般式方程的系数。通过这样就可以确定对称点的坐标。
代码如下:
import cv2
import numpy as np
import random
import json
def get_sym(a, b, coe):
# A, B, C = 1, -1, -5
A, B, C = coe
# a, b = 1, -5
# 对称点公式 https://zhidao.baidu.com/question/1691896311611343348.html
x = -(2 * A * B * b + (A ** 2 - B ** 2)
* a + 2 * A * C) / (A ** 2 + B ** 2)
y = -(2 * A * B * a + (B ** 2 - A ** 2)
* b + 2 * B * C) / (A ** 2 + B ** 2)
return int(x), int(-y)
def fold_top_left(img, mask, point):
pad = random.randint(50, 500)
coe = (2, -1, -pad)
A = random.randint(1, 5)
B = random.randint(-5, -1)
C = -pad
coe = (A, B, C)
A, B, C = coe
w, h = int(point[0]), int(point[1])
img_part = img[h:h + pad, w: w+pad]
for i in range(pad):
for j in range(pad):
if A * i - B * j + C < 0:
i_, j_ = get_sym(i, -j, coe)
img_part[i_-1][j_-1] = img_part[i][j]
img_part[i][j] = np.array([255, 255, 255])
mask[h+i][w+j] = 255
img[h:h + pad, w: w+pad] = img_part
return img, mask
def fold_top_right(img, mask, point):
coe = (5, 2, 0)
A, B, C = coe
A = random.randint(1, 10)
B = random.randint(1, 5)
while B > A:
B = random.randint(1, 5)
C = 0
coe = (A, B, C)
print(coe)
w, h = int(point[0]), int(point[1])
pad = random.randint(50, 500)
img_part = img[h:h+pad, w-pad:w]
img_mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
for i in range(pad):
for j in range(pad):
if A * i - B * j + C < 0:
i_, j_ = get_sym(i, -j, coe)
# print(i_, j_)
img_part[i_-1][j_-1] = img_part[i][j]
img_part[i][j] = np.array([255, 255, 255])
mask[h+i][w-pad+j] = 255
img[h:h+pad, w-pad:w] = img_part
return img, mask
def fold_bottom_right(img, mask, point):
pad = random.randint(50, 500)
w, h = int(point[0]), int(point[1])
pad = random.randint(50, 500)
img_part = img[h-pad:h, w-pad:w]
for i in range(pad):
for j in range(pad):
if i+j > pad:
img_part[pad - 1 - j][pad - 1 - i] = img_part[i][j]
img_part[i][j] = np.array([255, 255, 255])
mask[h-pad+i][w-pad+j] = 255
img[h-pad:h, w-pad:w] = img_part
return img, mask
def fold_bottom_left(img, mask, point):
# coe = (2, 5, 0)
# A, B, C = coe
A = random.randint(1, 5)
B = random.randint(1, 10)
while B < A:
B = random.randint(1, 10)
C = 0
coe = (A, B, C)
w, h = int(point[0]), int(point[1])
pad = random.randint(50, 500)
img_part = img[h-pad:h, w: w+pad]
print(pad)
for i in range(pad):
for j in range(pad):
if A * i - B * j + C > 0:
i_, j_ = get_sym(i, -j, coe)
img_part[i_][j_] = img_part[i][j]
img_part[i][j] = np.array([255, 255, 255])
mask[h-pad+i][w+j] = 255
img[h-pad:h, w: w+pad] = img_part
return img, mask
def generate_flip(img, mask, points):
top_left = points[0]
top_right = points[1]
bottom_right = points[2]
bottom_left = points[3]
# 随机对四个角进行翻折
tmp = random.randint(1, 100)
if tmp % 2 == 0:
img, mask = fold_top_left(img, mask, top_left)
tmp = random.randint(1, 100)
if tmp % 2 == 0:
img, mask = fold_top_right(img, mask, top_right)
tmp = random.randint(1, 100)
if tmp % 2 == 0:
img, mask = fold_bottom_right(img, mask, bottom_right)
tmp = random.randint(1, 100)
if tmp % 2 == 0:
img, mask = fold_bottom_left(img, mask, bottom_left)
cv2.imwrite('flipped.jpg', img)
cv2.imwrite('mask.jpg', mask)
return img, mask
if __name__ == '__main__':
img = cv2.imread('4_bf.jpg')
js_data = json.loads(open('4_bf.json').read())
points = js_data['shapes'][0]['points']
points = get_points(points)
mask = np.zeros((img.shape[0], img.shape[1]), dtype=np.uint8)
img = generate_flip(img, mask, points)
本文介绍了一种在文档图像上实现翻折效果的方法,通过坐标变换实现文档的局部翻折,以此来增加图像数据集的变化性和真实性。文章详细介绍了翻折效果的数学原理和具体的实现代码。
1万+

被折叠的 条评论
为什么被折叠?



