sse2

在网上找到个sse的事例程序,http://www.codeproject.com/Articles/4522/Introduction-to-SSE-Programming

对应的sse程序如下:

fResult[i] = sqrt( fSource1[i]*fSource1[i] + fSource2[i]*fSource2[i] ) + 0.5i = 0, 1, 2 ... ARRAY_SIZE-1

C++ function:

void CSSETestDlg::ComputeArrayCPlusPlus(
          float* pArray1,                   // [in] first source array
          float* pArray2,                   // [in] second source array
          float* pResult,                   // [out] result array
          int nSize)                        // [in] size of all arrays
{

    int i;

    float* pSource1 = pArray1;
    float* pSource2 = pArray2;
    float* pDest = pResult;

    for ( i = 0; i < nSize; i++ )
    {
        *pDest = (float)sqrt((*pSource1) * (*pSource1) + (*pSource2)
                 * (*pSource2)) + 0.5f;

        pSource1++;
        pSource2++;
        pDest++;
    }
}

C++ function with SSE Intrinsics:

void CSSETestDlg::ComputeArrayCPlusPlusSSE(
          float* pArray1,                   // [in] first source array
          float* pArray2,                   // [in] second source array
          float* pResult,                   // [out] result array
          int nSize)                        // [in] size of all arrays
{
    int nLoop = nSize/ 4;

    __m128 m1, m2, m3, m4;

    __m128* pSrc1 = (__m128*) pArray1;
    __m128* pSrc2 = (__m128*) pArray2;
    __m128* pDest = (__m128*) pResult;


    __m128 m0_5 = _mm_set_ps1(0.5f);        // m0_5[0, 1, 2, 3] = 0.5

    for ( int i = 0; i < nLoop; i++ )
    {
        m1 = _mm_mul_ps(*pSrc1, *pSrc1);        // m1 = *pSrc1 * *pSrc1
        m2 = _mm_mul_ps(*pSrc2, *pSrc2);        // m2 = *pSrc2 * *pSrc2
        m3 = _mm_add_ps(m1, m2);                // m3 = m1 + m2
        m4 = _mm_sqrt_ps(m3);                   // m4 = sqrt(m3)
        *pDest = _mm_add_ps(m4, m0_5);          // *pDest = m4 + 0.5
        
        pSrc1++;
        pSrc2++;
        pDest++;
    }
}

OpenCV上有段计算AD的程序如下:

for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
    {
        for( x = minX1; x < maxX1; x++ )
        {
            int u = prow1[x];
        #if CV_SSE2
            if( useSIMD )
            {
                __m128i _u = _mm_set1_epi8(u), z = _mm_setzero_si128();
               
                for( int d = minD; d < maxD; d += 16 )
                {
                    __m128i _v = _mm_loadu_si128((const __m128i*)(prow2 + width-1-x + d));
                    __m128i diff = _mm_adds_epu8(_mm_subs_epu8(_u,_v), _mm_subs_epu8(_v,_u));
                    __m128i c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
                    __m128i c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
                   
                    _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
                    _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
                }
            }
            else
        #endif
            {
                for( int d = minD; d < maxD; d++ )
                {
                    int v = prow2[width-1-x + d];
                    cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
                }
            }
        }
    }

可以看出,第一个每次循环+4,第二段每次+16,图像数据是8位的。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值