(转)任意角度的高质量的快速的图…

本文介绍了一种高效的图像旋转算法,通过优化内部循环判断和利用像素绘制原理寻找边界来提高旋转速度。采用TRotaryClipData类确定旋转边界,并通过PicRotary3函数实现图像旋转。进一步使用SSE指令优化内存写操作,显著提升处理速度。
E:优化内部循环的判断函数PixelsIsInPic,将判断展开到内部循环之外,跳过不需要处理的目标像素;
    TRotaryClipData类用于寻找旋转需要处理的边界范围;算法思路是首先寻找原图片中心点对应的;
那条扫描线,然后依次向上和向下寻找边界;  如果想要更快速的边界寻找算法,可以考虑利用像素的直线
  绘制原理来自动生成边界(有机会的时候再来实现它);

                   (转)任意角度的高质量的快速的图像旋转(中)

                       边界寻找算法图示

struct TRotaryClipData{
public:
    long src_width;
    long src_height;
    long dst_width;
    long dst_height;
    long Ax_16;
    long Ay_16;
    long Bx_16;
    long By_16;
    long Cx_16;
    long Cy_16;
public:
    long out_dst_up_y;
    long out_dst_down_y;

    long out_src_x0_16;
    long out_src_y0_16;
private:
    long out_dst_up_x0;
    long out_dst_up_x1;
    long out_dst_down_x0;
    long out_dst_down_x1;
public:
    inline long get_up_x0(){ if (out_dst_up_x0<0) return 0;  else return out_dst_up_x0; }
    inline long get_up_x1(){ if (out_dst_up_x1>=dst_width) return dst_width;  else return out_dst_up_x1; }
    inline long get_down_x0(){ if (out_dst_down_x0<0) return 0;  else return out_dst_down_x0; }
    inline long get_down_x1(){ if (out_dst_down_x1>=dst_width) return dst_width;  else return out_dst_down_x1; }

    inline bool is_in_src(long src_x_16,long src_y_16)
    {
         return ( ( (src_x_16>=0)&&((src_x_16>>16)<src_width) )
               && ( (src_y_16>=0)&&((src_y_16>>16)<src_height) ) );
    }
    void find_begin_in(long dst_y,long& out_dst_x,long& src_x_16,long& src_y_16)
    {
        src_x_16-=Ax_16;
        src_y_16-=Ay_16;
        while (is_in_src(src_x_16,src_y_16))
        {
            --out_dst_x;
            src_x_16-=Ax_16;
            src_y_16-=Ay_16;
        }
        src_x_16+=Ax_16;
        src_y_16+=Ay_16;
    }
    bool find_begin(long dst_y,long& out_dst_x0,long dst_x1)
    {
        long test_dst_x0=out_dst_x0-1;
        long src_x_16=Ax_16*test_dst_x0 + Bx_16*dst_y + Cx_16;
        long src_y_16=Ay_16*test_dst_x0 + By_16*dst_y + Cy_16;
        for (long i=test_dst_x0;i<=dst_x1;++i)
        {
            if (is_in_src(src_x_16,src_y_16))
            {
                out_dst_x0=i;
                if (i==test_dst_x0)
                    find_begin_in(dst_y,out_dst_x0,src_x_16,src_y_16);
                if (out_dst_x0<0)
                {
                    src_x_16-=(Ax_16*out_dst_x0);
                    src_y_16-=(Ay_16*out_dst_x0);
                }
                out_src_x0_16=src_x_16;
                out_src_y0_16=src_y_16;
                return true;
            }
            else
            {
                src_x_16+=Ax_16;
                src_y_16+=Ay_16;
            }
        }
        return false;
    }
    void find_end(long dst_y,long dst_x0,long& out_dst_x1)
    {
        long test_dst_x1=out_dst_x1;
        if (test_dst_x1<dst_x0) test_dst_x1=dst_x0;

        long src_x_16=Ax_16*test_dst_x1 + Bx_16*dst_y + Cx_16;
        long src_y_16=Ay_16*test_dst_x1 + By_16*dst_y + Cy_16;
        if (is_in_src(src_x_16,src_y_16))
        {
            ++test_dst_x1;
            src_x_16+=Ax_16;
            src_y_16+=Ay_16;
            while (is_in_src(src_x_16,src_y_16))
            {
                ++test_dst_x1;
                src_x_16+=Ax_16;
                src_y_16+=Ay_16;
            }
            out_dst_x1=test_dst_x1;
        }
        else
        {
            src_x_16-=Ax_16;
            src_y_16-=Ay_16;
            while (!is_in_src(src_x_16,src_y_16))
            {
                --test_dst_x1;
                src_x_16-=Ax_16;
                src_y_16-=Ay_16;
            }
            out_dst_x1=test_dst_x1;
        }
    }

    bool inti_clip(double move_x,double move_y)
    {
        //计算src中心点映射到dst后的坐标
        out_dst_down_y=(long)(src_height*0.5+move_y);
        out_dst_down_x0=(long)(src_width*0.5+move_x);
        out_dst_down_x1=out_dst_down_x0;
        //得到初始out_???
        if (find_begin(out_dst_down_y,out_dst_down_x0,out_dst_down_x1))
            find_end(out_dst_down_y,out_dst_down_x0,out_dst_down_x1);
        out_dst_up_y=out_dst_down_y;
        out_dst_up_x0=out_dst_down_x0;
        out_dst_up_x1=out_dst_down_x1;
        return (out_dst_down_x0<out_dst_down_x1);
    }
    bool next_clip_line_down()
    {
        ++out_dst_down_y;
        if (!find_begin(out_dst_down_y,out_dst_down_x0,out_dst_down_x1)) return false;
        find_end(out_dst_down_y,out_dst_down_x0,out_dst_down_x1);
        return (out_dst_down_x0<out_dst_down_x1);
    }
    bool next_clip_line_up()
    {
        --out_dst_up_y;
        if (!find_begin(out_dst_up_y,out_dst_up_x0,out_dst_up_x1)) return false;
        find_end(out_dst_up_y,out_dst_up_x0,out_dst_up_x1);
        return (out_dst_up_x0<out_dst_up_x1);
    }
};void PicRotary3_CopyLine(TARGB32* pDstLine,long dstCount,long Ax_16,long Ay_16,
                        long srcx0_16,long srcy0_16,TARGB32* pSrcLine,long src_byte_width)
{
    for (long x=0;x<dstCount;++x)
    {
        pDstLine[x]=Pixels(pSrcLine,src_byte_width,srcx0_16>>16,srcy0_16>>16);
        srcx0_16+=Ax_16;
        srcy0_16+=Ay_16;
    }
}

void PicRotary3(const TPicRegion& Dst,const TPicRegion& Src,double RotaryAngle,double ZoomX,double ZoomY,double move_x,double move_y)
{
    if ( (fabs(ZoomX*Src.width)<1.0e-4) || (fabs(ZoomY*Src.height)<1.0e-4) ) return; //太小的缩放比例认为已经不可见
    double tmprZoomXY=1.0/(ZoomX*ZoomY); 
    double rZoomX=tmprZoomXY*ZoomY;
    double rZoomY=tmprZoomXY*ZoomX;
    double sinA,cosA;
    SinCos(RotaryAngle,sinA,cosA);
    long Ax_16=(long)(rZoomX*cosA*(1<<16));
    long Ay_16=(long)(rZoomX*sinA*(1<<16));
    long Bx_16=(long)(-rZoomY*sinA*(1<<16));
    long By_16=(long)(rZoomY*cosA*(1<<16));
    double rx0=Src.width*0.5;  //(rx0,ry0)为旋转中心
    double ry0=Src.height*0.5;
    long Cx_16=(long)((-(rx0+move_x)*rZoomX*cosA+(ry0+move_y)*rZoomY*sinA+rx0)*(1<<16));
    long Cy_16=(long)((-(rx0+move_x)*rZoomX*sinA-(ry0+move_y)*rZoomY*cosA+ry0)*(1<<16));

    TRotaryClipData rcData;
    rcData.Ax_16=Ax_16;
    rcData.Bx_16=Bx_16;
    rcData.Cx_16=Cx_16;
    rcData.Ay_16=Ay_16;
    rcData.By_16=By_16;
    rcData.Cy_16=Cy_16;
    rcData.dst_width=Dst.width;
    rcData.dst_height=Dst.height;
    rcData.src_width=Src.width;
    rcData.src_height=Src.height;
    if (!rcData.inti_clip(move_x,move_y)) return;

    TARGB32* pDstLine=Dst.pdata;
    ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_down_y);
    while (true) //to down
    {
        long y=rcData.out_dst_down_y;
        if (y>=Dst.height) break;
        if (y>=0)
        {
            long x0=rcData.get_down_x0();
            PicRotary3_CopyLine(&pDstLine[x0],rcData.get_down_x1()-x0,Ax_16,Ay_16,
                rcData.out_src_x0_16,rcData.out_src_y0_16,Src.pdata,Src.byte_width);
        }
        if (!rcData.next_clip_line_down()) break;
        ((TUInt8*&)pDstLine)+=Dst.byte_width;
    }
  
    pDstLine=Dst.pdata;
    ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_up_y);
    while (rcData.next_clip_line_up()) //to up
    {
        long y=rcData.out_dst_up_y;
        if (y<0) break;
        ((TUInt8*&)pDstLine)-=Dst.byte_width;
        if (y<Dst.height)
        {
            long x0=rcData.get_up_x0();
            PicRotary3_CopyLine(&pDstLine[x0],rcData.get_up_x1()-x0,Ax_16,Ay_16,
                rcData.out_src_x0_16,rcData.out_src_y0_16,Src.pdata,Src.byte_width);
        }
    }
}
////////////////////////////////////////////////////////////////////////////////
//速度测试:
//==============================================================================
// PicRotary3             280.9 fps
//////////////////////////////////////////////////////////////////////////////// 


F:使用SSE的MOVNTQ指令优化CPU写缓冲

 (仅改写了PicRotary3_CopyLine的实现)


void __declspec(naked) __stdcall PicRotarySSE_CopyLine(TARGB32* pDstLine,long dstCount,long Ax_16,long Ay_16,
                        //                                      [esp+ 4]      [esp+ 8]      [esp+12]   [esp+16]
                        long srcx0_16,long srcy0_16,TARGB32* pSrcLine,long src_byte_width)
                        //   [esp+20]      [esp+24]          [esp+28]      [esp+32]
{
    //利用SSE的MOVNTQ指令优化写缓冲的汇编实现
    asm
    {
        push        ebx 
        push        esi 
        push        edi  
        push        ebp 
        //esp offset 16
        mov         ebx,dword ptr [esp+ 8+16]
        mov         esi,dword ptr [esp+32+16]
        mov         edi,dword ptr [esp+28+16]
        mov         eax,dword ptr [esp+24+16]
        mov         ecx,dword ptr [esp+20+16]
        dec         ebx
        xor         edx,edx
        test        ebx,ebx
        mov         dword ptr [esp+ 8+16],ebx
        jle         loop_bound

        //jmp   loop_begin
        //align 16
    loop_begin:
            mov         ebx,eax
            add         eax,dword ptr [esp+16+16]
            sar         ebx,16
            imul        ebx,esi
            add         ebx,edi
            mov         ebp,ecx
            add         ecx,dword ptr [esp+12+16]
            sar         ebp,16
            MOVD        MM0,dword ptr [ebx+ebp*4]
            mov         ebx,eax
            add         eax,dword ptr [esp+16+16]
            sar         ebx,16
            imul        ebx,esi
            mov         ebp,ecx
            add         ecx,dword ptr [esp+12+16]
            sar         ebp,16
            add         ebx,edi
            MOVD        MM1,dword ptr [ebx+ebp*4]
            mov         ebp,dword ptr [esp+ 4+16]
            PUNPCKlDQ   MM0,MM1
            mov         ebx,dword ptr [esp+ 8+16]
            MOVNTQ      qword ptr [ebp+edx*4],MM0
            add         edx,2
            cmp         edx,ebx 
            jl          loop_begin

            EMMS

    loop_bound:
        cmp         edx,ebx
        jne         loop_bound_end
        sar         eax,16
        imul        eax,esi
        sar         ecx,16
        add         eax,edi
        mov         eax,dword ptr [eax+ecx*4]
        mov         ecx,dword ptr [esp+ 4+16]
        mov         dword ptr [ecx+edx*4],eax
    loop_bound_end:
        pop         ebp 
        pop         edi 
        pop         esi 
        pop         ebx 
        ret         32 
    }
}

void PicRotarySSE(const TPicRegion& Dst,const TPicRegion& Src,double RotaryAngle,double ZoomX,double ZoomY,double move_x,double move_y)
{
    if ( (fabs(ZoomX*Src.width)<1.0e-4) || (fabs(ZoomY*Src.height)<1.0e-4) ) return; //太小的缩放比例认为已经不可见
    double tmprZoomXY=1.0/(ZoomX*ZoomY); 
    double rZoomX=tmprZoomXY*ZoomY;
    double rZoomY=tmprZoomXY*ZoomX;
    double sinA,cosA;
    SinCos(RotaryAngle,sinA,cosA);
    long Ax_16=(long)(rZoomX*cosA*(1<<16));
    long Ay_16=(long)(rZoomX*sinA*(1<<16));
    long Bx_16=(long)(-rZoomY*sinA*(1<<16));
    long By_16=(long)(rZoomY*cosA*(1<<16));
    double rx0=Src.width*0.5;  //(rx0,ry0)为旋转中心
    double ry0=Src.height*0.5;
    long Cx_16=(long)((-(rx0+move_x)*rZoomX*cosA+(ry0+move_y)*rZoomY*sinA+rx0)*(1<<16));
    long Cy_16=(long)((-(rx0+move_x)*rZoomX*sinA-(ry0+move_y)*rZoomY*cosA+ry0)*(1<<16));

    TRotaryClipData rcData;
    rcData.Ax_16=Ax_16;
    rcData.Bx_16=Bx_16;
    rcData.Cx_16=Cx_16;
    rcData.Ay_16=Ay_16;
    rcData.By_16=By_16;
    rcData.Cy_16=Cy_16;
    rcData.dst_width=Dst.width;
    rcData.dst_height=Dst.height;
    rcData.src_width=Src.width;
    rcData.src_height=Src.height;
    if (!rcData.inti_clip(move_x,move_y)) return;

    TARGB32* pDstLine=Dst.pdata;
    ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_down_y);
    while (true) //to down
    {
        long y=rcData.out_dst_down_y;
        if (y>=Dst.height) break;
        if (y>=0)
        {
            long x0=rcData.get_down_x0();
            PicRotarySSE_CopyLine(&pDstLine[x0],rcData.get_down_x1()-x0,Ax_16,Ay_16,
                rcData.out_src_x0_16,rcData.out_src_y0_16,Src.pdata,Src.byte_width);
        }
        if (!rcData.next_clip_line_down()) break;
        ((TUInt8*&)pDstLine)+=Dst.byte_width;
    }
  
    pDstLine=Dst.pdata;
    ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_up_y);
    while (rcData.next_clip_line_up()) //to up
    {
        long y=rcData.out_dst_up_y;
        if (y<0) break;
        ((TUInt8*&)pDstLine)-=Dst.byte_width;
        if (y<Dst.height)
        {
            long x0=rcData.get_up_x0();
            PicRotarySSE_CopyLine(&pDstLine[x0],rcData.get_up_x1()-x0,Ax_16,Ay_16,
                rcData.out_src_x0_16,rcData.out_src_y0_16,Src.pdata,Src.byte_width);
        }
    }

    asm  sfence //刷新写入
}
////////////////////////////////////////////////////////////////////////////////
//速度测试:
//==============================================================================
// PicRotarySEE           306.3 fps
////////////////////////////////////////////////////////////////////////////////

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值