逆向透视变换的带透明通道的贴图数据增强

最新推荐文章于 2025-04-22 22:45:06 发布

Alphapeople

最新推荐文章于 2025-04-22 22:45:06 发布

阅读量163

点赞数

CC 4.0 BY-SA版权

分类专栏：计算机视觉

本文链接：https://blog.youkuaiyun.com/weixin_38241876/article/details/117280572

计算机视觉专栏收录该内容

107 篇文章

订阅专栏

该代码实现了一系列图像处理操作，包括旋转、透视变换和图像粘贴。首先，定义了角度转换函数`rad`，然后是`order_points`函数用于对四个点进行排序，以便进行透视变换。`four_point_transform`函数应用透视变换，`split`函数用于切分列表，`rota`函数执行图像旋转。最后，`getpaste`函数将一个图像粘贴到另一个经过透视变换的图像上。整个过程用于处理和变换图像，例如将小图像精确地粘贴到大图像的特定位置。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import numpy as np
import cv2
from math import *
from PIL import Image
import random

def rad(x):
  return x * np.pi / 180

def order_points(pts):
    # initialzie a list of coordinates that will be ordered
    # such that the first entry in the list is the top-left,
    # the second entry is the top-right, the third is the
    # bottom-right, and the fourth is the bottom-left
    rect = np.zeros((4, 2), dtype="float32")
    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    # return the ordered coordinates
    return rect


def four_point_transform(image, pts):
    # obtain a consistent order of the points and unpack them
    # individually
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # compute the width of the new image, which will be the
    # maximum distance between bottom-right and bottom-left
    # x-coordiates or the top-right and top-left x-coordinates
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # now that we have the dimensions of the new image, construct
    # the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, again specifying points
    # in the top-left, top-right, bottom-right, and bottom-left
    # order
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")
    # compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    M = np.linalg.inv(M)
    # warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    # return the warped image
    return M,(maxWidth, maxHeight)

def split(L,n):
    List = []
    for j in range(0, len(L), n):
        b = L[j:j + n]
        List.append(b)
    return List

def rota(img, degree):
    height, width = img.shape[:2]
    # 旋转后的尺寸
    heightNew = int(width * fabs(sin(radians(degree))) + height * fabs(cos(radians(degree))))
    widthNew = int(height * fabs(sin(radians(degree))) + width * fabs(cos(radians(degree))))
    matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1)
    matRotation[0, 2] += (widthNew - width) / 2
    matRotation[1, 2] += (heightNew - height) / 2
    imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
    return imgRotation

def getpaste(pasteimgpath,warpR,size,backsize):
    # img = cv2.imread(pasteimgpath)
    img = Image.open(pasteimgpath)
    img = np.array(img)
    h,w,_ = img.shape
    if size[0] < size[1] and w > h:
        img = rota(img,90)
    if size[0] > size[1] and w < h:
        img = rota(img, 90)
    img = cv2.resize(img,size)

    img = Image.fromarray(img)
    img = img.convert('RGBA')
    img = np.array(img)

    result = cv2.warpPerspective(img, warpR, backsize)
    result = Image.fromarray(result)
    return result


cc = 0
for i in range(35):
    image = cv2.imread('/home/lhq/xg/{}.jpg'.format(i))
    try:
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        H,W,_ = image.shape
        backsize = (W, H)
        with open('/home/lhq/xg/{}.txt'.format(i)) as f:
            datas = f.read().strip().split(' ')
            datas = split(datas,8)
            for data in datas:
                try:
                    data = [int(x) for x in data]
                    List = split(data,2)
                    hua = np.array([List])
                    pts = np.array(List, dtype="float32")
                    im = np.zeros(shape=image.shape)
                    cv2.polylines(im,hua.astype(np.int32), True, (255,255,255),2)
                    # apply the four point tranform to obtain a "birds eye view" of
                    # the image

                    warpR,size = four_point_transform(image, pts)
                    transimg = getpaste('/home/lhq/mb3/{}.png'.format(random.randint(0,31)),warpR,size,backsize)
                    # transimg = getpaste('lena.png', warpR, size, backsize)
                    b, g, r, a = transimg.split()
                    image = Image.fromarray(image)
                    image.paste(transimg,(0,0),mask=a)
                    image = np.array(image)

                except:
                    pass
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            cv2.imshow('', image)
            cv2.waitKey(0)
    except:
        pass