与变换量化有关的其他知识
变换
哈达玛变换
哈达玛变换是广义傅立叶变换的一种,它的变换矩阵Hm是一个2^m x 2^m的矩阵。
哈达玛变换及其矩阵有下面的几个特性:1、hadamard矩阵元素都是正负1,且其特征值也只包含正负1
2、hadamard矩阵为正交、对称矩阵,相应的hadamard变换是正交变换
3、hadamard矩阵奇数行(列)偶对称,偶数行(列)奇对称
4、hadamard变换满足变换前后能量守恒
使用哈达玛变换的地方
哈达玛变换在HEVC中的主要运用是在帧内预测的时候计算SAD,下面的函数使用了哈达玛变换
Void TEncSearch::estIntraPredQT( TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv,
TComYuv* pcResiYuv,
TComYuv* pcRecoYuv,
UInt& ruiDistC,
Bool bLumaOnly )
{
// 省略...
// 循环处理每一个预测块PU
for( UInt uiPU = 0; uiPU < uiNumPU; uiPU++, uiPartOffset += uiQNumParts )
{
// 省略...
// 使用快速搜索模式
if (doFastSearch)
{
// 省略...
for( Int modeIdx = 0; modeIdx < numModesAvailable; modeIdx++ ) // 总共有35种模式,numModesAvailable = 35
{
UInt uiMode = modeIdx;
// 对亮度块进行预测
predIntraLumaAng( pcCU->getPattern(), uiMode, piPred, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );
// use hadamard transform here
// 使用hadamard变换
UInt uiSad = m_pcRdCost->calcHAD(g_bitDepthY, piOrg, uiStride, piPred, uiStride, uiWidth, uiHeight );
UInt iModeBits = xModeBitsIntra( pcCU, uiMode, uiPU, uiPartOffset, uiDepth, uiInitTrDepth );
// 计算此种模式的代价
Double cost = (Double)uiSad + (Double)iModeBits * m_pcRdCost->getSqrtLambda();
// 更新候选列表
CandNum += xUpdateCandList( uiMode, cost, numModesForFullRD, uiRdModeList, CandCostList );
}
// 省略...
}
else
{
// 省略...
}
// 省略...
} // PU loop
// 省略...
}
Void TEncSearch::preestChromaPredMode( TComDataCU* pcCU,
TComYuv* pcOrgYuv,
TComYuv* pcPredYuv )
{
// 省略...
for( UInt uiMode = uiMinMode; uiMode < uiMaxMode; uiMode++ )
{
//--- get prediction ---
// 色度分量的帧内预测
predIntraChromaAng( pPatChromaU, uiMode, piPredU, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );
predIntraChromaAng( pPatChromaV, uiMode, piPredV, uiStride, uiWidth, uiHeight, bAboveAvail, bLeftAvail );
//--- get SAD ---
// 计算sad
UInt uiSAD = m_pcRdCost->calcHAD(g_bitDepthC, piOrgU, uiStride, piPredU, uiStride, uiWidth, uiHeight );
uiSAD += m_pcRdCost->calcHAD(g_bitDepthC, piOrgV, uiStride, piPredV, uiStride, uiWidth, uiHeight );
//--- check ---
// 更新最优代价
if( uiSAD < uiMinSAD )
{
uiMinSAD = uiSAD;
uiBestMode = uiMode;
}
}
//===== set chroma pred mode =====
// 保存预测模式
pcCU->setChromIntraDirSubParts( uiBestMode, 0, pcCU->getDepth( 0 ) );
}
使用哈达玛变换计算SAD
// 使用hadamard变换来计算SAD
UInt TComRdCost::calcHAD(Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
{
UInt uiSum = 0;
Int x, y;
// 如果宽高是8的整数倍(进入这里)
if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
{
for ( y=0; y<iHeight; y+= 8 )
{
for ( x=0; x<iWidth; x+= 8 )
{
uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
}
pi0 += iStride0*8;
pi1 += iStride1*8;
}
}
else
{
// 特别针对宽高等于4的情况
assert(iWidth % 4 == 0 && iHeight % 4 == 0);
for ( y=0; y<iHeight; y+= 4 )
{
for ( x=0; x<iWidth; x+= 4 )
{
uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
}
pi0 += iStride0*4;
pi1 += iStride1*4;
}
}
// 结果返回
return uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8);
}
哈达玛变换函数
// 8x8的hadamard变换
UInt TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
{
Int k, i, j, jj, sad=0;
Int diff[64], m1[8][8], m2[8][8], m3[8][8];
assert( iStep == 1 );
for( k = 0; k < 64; k += 8 )
{
diff[k+0] = piOrg[0] - piCur[0];
diff[k+1] = piOrg[1] - piCur[1];
diff[k+2] = piOrg[2] - piCur[2];
diff[k+3] = piOrg[3] - piCur[3];
diff[k+4] = piOrg[4] - piCur[4];
diff[k+5] = piOrg[5] - piCur[5];
diff[k+6] = piOrg[6] - piCur[6];
diff[k+7] = piOrg[7] - piCur[7];
piCur += iStrideCur;
piOrg += iStrideOrg;
}
//horizontal
for (j=0; j < 8; j++)
{
jj = j << 3;
m2[j][0] = diff[jj ] + diff[jj+4];
m2[j][1] = diff[jj+1] + diff[jj+5];
m2[j][2] = diff[jj+2] + diff[jj+6];
m2[j][3] = diff[jj+3] + diff[jj+7];
m2[j][4] = diff[jj ] - diff[jj+4];
m2[j][5] = diff[jj+1] - diff[jj+5];
m2[j][6] = diff[jj+2] - diff[jj+6];
m2[j][7] = diff[jj+3] - diff[jj+7];
m1[j][0] = m2[j][0] + m2[j][2];
m1[j][1] = m2[j][1] + m2[j][3];
m1[j][2] = m2[j][0] - m2[j][2];
m1[j][3] = m2[j][1] - m2[j][3];
m1[j][4] = m2[j][4] + m2[j][6];
m1[j][5] = m2[j][5] + m2[j][7];
m1[j][6] = m2[j][4] - m2[j][6];
m1[j][7] = m2[j][5] - m2[j][7];
m2[j][0] = m1[j][0] + m1[j][1];
m2[j][1] = m1[j][0] - m1[j][1];
m2[j][2] = m1[j][2] + m1[j][3];
m2[j][3] = m1[j][2] - m1[j][3];
m2[j][4] = m1[j][4] + m1[j][5];
m2[j][5] = m1[j][4] - m1[j][5];
m2[j][6] = m1[j][6] + m1[j][7];
m2[j][7] = m1[j][6] - m1[j][7];
}
//vertical
for (i=0; i < 8; i++)
{
m3[0][i] = m2[0][i] + m2[4][i];
m3[1][i] = m2[1][i] + m2[5][i];
m3[2][i] = m2[2][i] + m2[6][i];
m3[3][i] = m2[3][i] + m2[7][i];
m3[4][i] = m2[0][i] - m2[4][i];
m3[5][i] = m2[1][i] - m2[5][i];
m3[6][i] = m2[2][i] - m2[6][i];
m3[7][i] = m2[3][i] - m2[7][i];
m1[0][i] = m3[0][i] + m3[2][i];
m1[1][i] = m3[1][i] + m3[3][i];
m1[2][i] = m3[0][i] - m3[2][i];
m1[3][i] = m3[1][i] - m3[3][i];
m1[4][i] = m3[4][i] + m3[6][i];
m1[5][i] = m3[5][i] + m3[7][i];
m1[6][i] = m3[4][i] - m3[6][i];
m1[7][i] = m3[5][i] - m3[7][i];
m2[0][i] = m1[0][i] + m1[1][i];
m2[1][i] = m1[0][i] - m1[1][i];
m2[2][i] = m1[2][i] + m1[3][i];
m2[3][i] = m1[2][i] - m1[3][i];
m2[4][i] = m1[4][i] + m1[5][i];
m2[5][i] = m1[4][i] - m1[5][i];
m2[6][i] = m1[6][i] + m1[7][i];
m2[7][i] = m1[6][i] - m1[7][i];
}
for (i = 0; i < 8; i++)
{
for (j = 0; j < 8; j++)
{
sad += abs(m2[i][j]);
}
}
sad=((sad+2)>>2);
return sad;
}
DST
DST主要用在4x4模式的亮度块上,使用过程如下:
1、变换量化处理进入transformNxN函数
2、transformNxN调用xT函数进行变换处理
3、xT调用xTrMxN函数进行蝶形快速变换
// TU的大小最大是32,最小是4
void xTrMxN(Int bitDepth, Short *block,Short *coeff, Int iWidth, Int iHeight, UInt uiMode)
{
// 省略...
if( iWidth == 4 && iHeight == 4)
{
if (uiMode != REG_DCT)
{
// 快速变换
fastForwardDst(block,tmp,shift_1st); // Forward DST BY FAST ALGORITHM, block input, tmp output
fastForwardDst(tmp,coeff,shift_2nd); // Forward DST BY FAST ALGORITHM, tmp input, coeff output
}
else
{
// 蝴蝶型变换
partialButterfly4(block, tmp, shift_1st, iHeight);
partialButterfly4(tmp, coeff, shift_2nd, iWidth);
}
}
// 省略...
}
使用蝶形快速变换实现的DST
// 使用蝶形快速变换实现的DST
void partialButterfly4(Short *src,Short *dst,Int shift, Int line)
{
Int j;
Int E[2],O[2];
Int add = 1<<(shift-1);
for (j=0; j<line; j++)
{
/* E and O */
E[0] = src[0] + src[3];
O[0] = src[0] - src[3];
E[1] = src[1] + src[2];
O[1] = src[1] - src[2];
dst[0] = (g_aiT4[0][0]*E[0] + g_aiT4[0][1]*E[1] + add)>>shift;
dst[2*line] = (g_aiT4[2][0]*E[0] + g_aiT4[2][1]*E[1] + add)>>shift;
dst[line] = (g_aiT4[1][0]*O[0] + g_aiT4[1][1]*O[1] + add)>>shift;
dst[3*line] = (g_aiT4[3][0]*O[0] + g_aiT4[3][1]*O[1] + add)>>shift;
src += 4;
dst ++;
}
}
量化
量化矩阵
1、使用量化矩阵的原因是,对不同位置的系数使用不同的量化步长,这样能提高视频的主观质量
2、量化矩阵作用于比例缩放过程,其大小和TU相同(从4x4到32x32)
3、HEVC定义了4x4和8x8两种大小的默认量化矩阵,并规定16x16、32x32量化矩阵可以由8x8量化矩阵采样得到
4、HEVC对量化矩阵中的元素使用差分编码!
量化矩阵的定义
Int g_quantTSDefault4x4[16] =
{
16,16,16,16,
16,16,16,16,
16,16,16,16,
16,16,16,16
};
Int g_quantIntraDefault8x8[64] =
{
16,16,16,16,17,18,21,24,
16,16,16,16,17,19,22,25,
16,16,17,18,20,22,25,29,
16,16,18,21,24,27,31,36,
17,17,20,24,30,35,41,47,
18,19,22,27,35,44,54,65,
21,22,25,31,41,54,70,88,
24,25,29,36,47,65,88,115
};
Int g_quantInterDefault8x8[64] =
{
16,16,16,16,17,18,20,24,
16,16,16,17,18,20,24,25,
16,16,17,18,20,24,25,28,
16,17,18,20,24,25,28,33,
17,18,20,24,25,28,33,41,
18,20,24,25,28,33,41,54,
20,24,25,28,33,41,54,71,
24,25,28,33,41,54,71,91
};
量化矩阵的选择
在compressGOP中有一段代码是设置量化矩阵的
量化步长的来源有三种:
1、平坦的,使用平坦的量化矩阵(即,使用标量量化),TU中每个系数使用量化步长都一样
2、默认的,使用编码器提供的默认量化矩阵
3、自定义的,从文件中读取用户自定义的量化矩阵
// 如果没有使用缩放列表
if(m_pcEncTop->getUseScalingListId() == SCALING_LIST_OFF)
{
// 默认缩放列表是关闭的
// 进入
m_pcEncTop->getTrQuant()->setFlatScalingList();
m_pcEncTop->getTrQuant()->setUseScalingList(false);
m_pcEncTop->getSPS()->setScalingListPresentFlag(false);
m_pcEncTop->getPPS()->setScalingListPresentFlag(false);
}
// 使用了默认的缩放列表
else if(m_pcEncTop->getUseScalingListId() == SCALING_LIST_DEFAULT)
{
pcSlice->setDefaultScalingList ();
m_pcEncTop->getSPS()->setScalingListPresentFlag(false);
m_pcEncTop->getPPS()->setScalingListPresentFlag(false);
m_pcEncTop->getTrQuant()->setScalingList(pcSlice->getScalingList());
m_pcEncTop->getTrQuant()->setUseScalingList(true);
}
// 从文件中读取缩放列表
else if(m_pcEncTop->getUseScalingListId() == SCALING_LIST_FILE_READ)
{
if(pcSlice->getScalingList()->xParseScalingList(m_pcCfg->getScalingListFile()))
{
pcSlice->setDefaultScalingList ();
}
pcSlice->getScalingList()->checkDcOfMatrix();
m_pcEncTop->getSPS()->setScalingListPresentFlag(pcSlice->checkDefaultScalingList());
m_pcEncTop->getPPS()->setScalingListPresentFlag(false);
m_pcEncTop->getTrQuant()->setScalingList(pcSlice->getScalingList());
m_pcEncTop->getTrQuant()->setUseScalingList(true);
}
else
{
printf("error : ScalingList == %d no support\n",m_pcEncTop->getUseScalingListId());
assert(0);
}
为slice设置量化矩阵
以setDefaultScalingList函数为例子,看看怎么样为slice设置量化矩阵式
Void TComSlice::setDefaultScalingList()
{
for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
{
for(UInt listId=0;listId<g_scalingListNum[sizeId];listId++)
{
getScalingList()->processDefaultMatrix(sizeId, listId);
}
}
}
调用processDefaultMatrix把量化矩阵保存到slice中
Void TComScalingList::processDefaultMatrix(UInt sizeId, UInt listId)
{
::memcpy(getScalingListAddress(sizeId, listId),getScalingListDefaultAddress(sizeId,listId),sizeof(Int)*min(MAX_MATRIX_COEF_NUM,(Int)g_scalingListSize[sizeId]));
setScalingListDC(sizeId,listId,SCALING_LIST_DC);
}
其中getScalingListDefaultAddress用于获取编码器定义的默认量化矩阵
Int* TComScalingList::getScalingListDefaultAddress(UInt sizeId, UInt listId)
{
Int *src = 0;
switch(sizeId)
{
case SCALING_LIST_4x4:
src = g_quantTSDefault4x4;
break;
case SCALING_LIST_8x8:
src = (listId<3) ? g_quantIntraDefault8x8 : g_quantInterDefault8x8;
break;
case SCALING_LIST_16x16:
src = (listId<3) ? g_quantIntraDefault8x8 : g_quantInterDefault8x8;
break;
case SCALING_LIST_32x32:
src = (listId<1) ? g_quantIntraDefault8x8 : g_quantInterDefault8x8;
break;
default:
assert(0);
src = NULL;
break;
}
return src;
}
通过上面的步骤之后,量化矩阵被保存到了TComSlice的m_scalingList成员中
为量化对象TComTrQuant设置量化矩阵
在compressGOP函数中为量化对象TComTrQuant对象设置量化矩阵,量化矩阵来自TComSlice的m_scalingList成员,这个成员在上一步中已经设置好了
m_pcEncTop->getTrQuant()->setScalingList(pcSlice->getScalingList());
Void TComTrQuant::setScalingList(TComScalingList *scalingList)
{
UInt size,list;
UInt qp;
for(size=0;size<SCALING_LIST_SIZE_NUM;size++)
{
for(list = 0; list < g_scalingListNum[size]; list++)
{
for(qp=0;qp<SCALING_LIST_REM_NUM;qp++)
{
xSetScalingListEnc(scalingList,list,size,qp);
xSetScalingListDec(scalingList,list,size,qp);
setErrScaleCoeff(list,size,qp);
}
}
}
}
把量化矩阵的数据保存到TComTrQuant的m_quantCoef成员中
Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, UInt qp)
{
UInt width = g_scalingListSizeX[sizeId];
UInt height = g_scalingListSizeX[sizeId];
UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
Int *quantcoeff;
Int *coeff = scalingList->getScalingListAddress(sizeId,listId); // 量化矩阵
quantcoeff = getQuantCoeff(listId, qp, sizeId); // TComTrQuant的m_quantCoef成员
// 把量化矩阵经过一定处理后复制给TComTrQuant的m_quantCoef成员
processScalingListEnc(coeff,quantcoeff,g_quantScales[qp]<<4,height,width,ratio,min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]),scalingList->getScalingListDC(sizeId,listId));
}
Void TComTrQuant::processScalingListEnc( Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
{
Int nsqth = (height < width) ? 4: 1; //height ratio for NSQT
Int nsqtw = (width < height) ? 4: 1; //width ratio for NSQT
for(UInt j=0;j<height;j++)
{
for(UInt i=0;i<width;i++)
{
quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j * nsqth / ratio) + i * nsqtw /ratio];
}
}
if(ratio > 1)
{
quantcoeff[0] = quantScales / dc;
}
}
使用量化矩阵
现在,TComTrQuant的m_quantCoef成员中已经保存了量化矩阵了,下面看看在量化过程中怎么样使用量化矩阵
在TComTrQuant::xQuant函数中有下面这样一段代码
Int *piQuantCoeff = 0;
// 使用量化矩阵
piQuantCoeff = getQuantCoeff(scalingListType,m_cQP.m_iRem,uiLog2TrSize-2);
// ...
iLevel = ((Int64)abs(iLevel) * piQuantCoeff[uiBlockPos] + iAdd ) >> iQBits; // 量化核心操作