学习一下硬件级别的优化,测试结果和Q_rsqrt类似,非常奇怪,不解。
测试代码如下:
#include <stdio.h>
#include <time.h>
float Q_fabs( float f ) {
int tmp = * ( int * ) &f;
tmp &= 0x7FFFFFFF;
return * ( float * ) &tmp;
}
inline float Q_fabs_inline( float f ) {
int tmp = * ( int * ) &f;
tmp &= 0x7FFFFFFF;
return * ( float * ) &tmp;
}
float fabs(float f)
{
if(f < 0)
return -f;
else
return f;
}
inline float fabs_inline(float f)
{
if(f < 0)
return -f;
else
return f;
}
double Q_fabs_double( double f ) {
__int64 tmp = * ( __int64 * ) &f;
tmp &= 0x7FFFFFFFFFFFFFFF;
return * ( double * ) &tmp;
}
inline double fabs_double_inline( double f ) {
if(f < 0)
return -f;
else
return f;
}
int main(void)
{
float f_1, f_t;
long lCount_1, lCount_2, lCount_3, sum;
time_t tStart, tEnd;
#define TestABS(func) /
lCount_1 = sum+1;/
tStart = clock();/
while(--lCount_1){lCount_2 = sum+1;while(--lCount_2)f_t = func(f_1);}/
tEnd = clock();/
printf("sum=%d*%d,/tfunction:" #func ",/ttime:%d,/tresult=%f/n", sum, sum, (tEnd - tStart), func(f_1));
sum = 10000;
f_1 = -0.123;
TestABS(Q_fabs);
TestABS(Q_fabs_inline);
TestABS(fabs);
TestABS(fabs_inline);
return 0;
}
测试结果:
sum=10000*10000, function:Q_fabs, time:801, result=0.123000
sum=10000*10000, function:Q_fabs_inline, time:0, result=0.123000
sum=10000*10000, function:fabs, time:981, result=0.123000
sum=10000*10000, function:fabs_inline, time:0, result=0.123000