由Target Image 嵌入 Origin Image, 扫描线寻点,单应矩阵逆使用
仅作记录,任何原理性算法可自行查阅资料
先上预期:结果:
Origin_n Image
Target_n Image
Result Image
解疑:为什么有了之前的方法还需要使用逆过程?
上一文图像增强,坐标变换,手写单应矩阵求解确实提到使用origin image 像素坐标 向 target image 像素坐标转换的思路,也不能那是错的,但是在一般情况下会出现裂隙的图像。极端案例:origin [3,3],需要填充target [100,100],如果以origin 为基准坐标,转换之后的也仅仅是[3,3]的大小,target图像将会有大量空缺值,而以target为基准,会发生严重失真。
使用 哪一种方式,可以考虑origin的大小和target的插入值大小。
为了使得 可以根据target需要的点而找到origin的点,首先就需要知道target框选的所有点的像素坐标。
如果使用射线法(判断点是否落在多边形内的一种算法),则需要O(mnk)的时间复杂度,m*n表示图像宽高,k表示多边形的顶点个数,这是无法接受的
这里使用 有序边表+扫描线算法来查找这些点,理论上说,这个时间复杂度与这个多边形的面积呈正比关系。具体原理可自行查阅资料。
基于上一文code继续给出一下代码。
code
scanline.py
import numpy as np
class ET_NODE:
def __init__(self):
self.x = 0
self.d_k = 0
self.y_max = 0
self.next = None
def create_et(points):
j = points.shape[0] - 1
table = {}
for i in range(points.shape[0]):
begin,end = points[j],points[i]
node = ET_NODE()
node.x = begin[0] if begin[1]<end[1] else end[0]
node.y_max = max(begin[1],end[1])
node.d_k = (begin[0]-end[0])/(begin[1]-end[1])
curr_y = min(begin[1],end[1])
if(curr_y not in table.keys()):
table[curr_y] = node
else:
tmp = table[curr_y]
while(tmp.next):
tmp = tmp.next
tmp.next = node
j = i
return table
def print_et(et_table):
for key in sorted(et_table.keys()):
node = et_table[key]
print("key:",key,"value:",node.x,node.d_k,node.y_max,end = "==>")
while(node.next):
node = node.next
print("key:",key,"value:",node.x,node.d_k,node.y_max,end = "==>")
print()
def action_loop(et_table):
points = []
y_min,y_max = min(et_table.keys()),max([et_table[x].y_max for x in et_table.keys()])
aet_table = []
for y in range(y_min,y_max+1):
# remove edge
new_aet_table = []
for k in range(len(aet_table)):
edge = aet_table[k]
if(edge.y_max != y):new_aet_table.append(edge)
if(edge.y_max == y):points.append([round(edge.x),y])
aet_table = new_aet_table
# add edge
if(y in et_table.keys()):
tmp = et_table[y]
aet_table.append(tmp)
while(tmp.next):
tmp = tmp.next
aet_table.append(tmp)
aet_table.sort(key=lambda node:node.x)
# add points
for k in range(0,len(aet_table),2):
begin,end = aet_table[k],aet_table[k+1]
for p in range(round(begin.x),round(end.x+1)):
points.append([p,y])
# update aet
for k in range(len(aet_table)):
edge = aet_table[k]
edge.x += edge.d_k
return points
if __name__ == "__main__":
points = np.array(([2,2],[5,1],[5,5],[11,3],[11,8],[2,7]))
et_table = create_et(points)
print_et(et_table)
in_points = action_loop(et_table)
print(in_points)
cali.py
import numpy as np
import cv2 as cv
from tqdm import tqdm
from scanline import create_et,action_loop
def load_data():
"""
load need data
"""
# origin_img = cv.imread("./data/origin.png")
# target_img = cv.imread("./data/target.png")
origin_img = cv.imread("./data/origin_n.jpg")
target_img = cv.imread("./data/target_n.jpg")
# origin_img = cv.imread("./data/origin.jpg")
# target_img = cv.imread("./data/target.jpg")
# left-top, left-bottom, right-bottom, right-top
origin = np.matrix(np.array([[0,0],[0,origin_img.shape[0]],[origin_img.shape[1],origin_img.shape[0]],[origin_img.shape[1],0]]))
# 鼠标操作,鼠标选中源图像中需要替换的位置信息
def mouse_action(event, x, y, flags, target):
cv.imshow('collect coordinate', target_img)
if event == cv.EVENT_LBUTTONUP:
cv.circle(target_img, (x, y), 2, (0, 255, 255), -1)
print(f'{x}, {y}')
target.append([x, y])
target = []
cv.namedWindow('collect coordinate')
cv.setMouseCallback('collect coordinate', mouse_action, target)
while True:
if cv.waitKey(20) == 27:
break
target = np.matrix(np.array(target))
print(origin,target)
return origin,target,origin_img,target_img
def init_model(origin,target):
"""
return A,B,H mat
"""
if(origin.shape[0] != target.shape[0]):raise Exception("origin shape must same with target shape")
A = np.zeros([2* origin.shape[0],8])
B = np.zeros([2*origin.shape[0],1])
H = np.random.uniform(0,1,[8,1])
for i in range(origin.shape[0]):
x, y = origin[i,0], origin[i, 1]
x_, y_ = target[i,0], target[i, 1]
A[i* 2] = np.array([x,y,1,0,0,0,-x*x_,-y*x_])
A[i* 2 + 1] = np.array([0,0,0,x,y,1,-x*y_,-y*y_])
B[i* 2] = np.array([x_])
B[i* 2 + 1] = np.array([y_])
return np.matrix(A),np.matrix(B),np.matrix(H)
def train(A,B):
"""
最小二乘优化 单应矩阵/可替换其他凸函数优化
"""
return (A.T*A).I*A.T*B
def fitness(A,B,H):
"""
计算适应值
"""
return np.sum(np.abs(A*H - B))
def predict(origin_img,target_img,H):
"""
application show
"""
for y in tqdm(range(origin_img.shape[0])):
for x in range(origin_img.shape[1]):
# pos origin [3,1]
pos_O = np.matrix(np.array([x,y,1])).T
# pos T [3,1]
# [3,3] * [3,1] ==>[3,1]
pos_T = H @ pos_O
pos_T = pos_T / pos_T.T.A[0,2]
# u,v
u,v = int(np.round(pos_T.T.A[0,0])),int(np.round(pos_T.T.A[0,1]))
if(0<=u<target_img.shape[1] and 0<=v<target_img.shape[0]):
target_img[v,u] = origin_img[y,x]
return target_img
def predict_inv(origin_img,target_img,points,H):
for i in tqdm(range(points.shape[0])):
x,y = points[i,0],points[i,1]
pos_T = np.matrix(np.array([x,y,1])).T
pos_O = H.I @ pos_T
pos_O = pos_O/ pos_O.T.A[0,2]
u,v = int(np.round(pos_O.T.A[0,0])),int(np.round(pos_O.T.A[0,1]))
if(0<=u<origin_img.shape[1] and 0<=v<origin_img.shape[0]):
target_img[y,x] = origin_img[v,u]
return target_img
if __name__ == "__main__":
origin,target,o_img,t_img = load_data()
A,B,H = init_model(origin,target)
init_loss = fitness(A,B,H)
print(A.shape,B.shape,H.shape)
print(f"init loss value is :{init_loss}")
new_H = train(A,B)
trained_loss = fitness(A,B,new_H)
print(f"trained loss value is :{trained_loss}")
end_H = np.append(new_H,[[1]],axis=0).reshape(3,3)
# end_img = predict(o_img,t_img,end_H)
et_table = create_et(np.array(target))
in_points = np.array(action_loop(et_table))
end_img = predict_inv(o_img,t_img,in_points,end_H)
cv.imshow("end image",end_img)
cv.waitKey(0)
cv.imwrite('./data/result_n.png',end_img)
必须提及:上述扫描线算法会将即将删除的边的顶点重复添加,这是一个瑕疵,但并不影响后续操作,同一个点所算出的坐标会相同,仅仅多做一次覆盖
对比上一文的code, 本文的code 添加 predict_inv 接口,用于反向计算。
因为origin size > target 的所需面积 ,所以这里也不会发生失真,另外,可以考虑另外一种情况(orgin size < target 所需面积),然后分别以这两种方式进行对比验证。
最后,当出现间隙时候,可以考虑使用双线性插值的方式进行像素的补齐操作。后续会补充这种优化方案。