问题:如何高效的构建一个螺旋矩阵?
前面的文章讨论了两种螺旋矩阵。当N比较小时,可以用模拟法(测试代码中的build_1a和build_1b函数),另外可以将4个for循环体合并到一个(build_2a,build_2b和build_2c函数)。但N比较大时,由于不断的对内存跳跃式访问,CPU cache line命中率很低,定位和载入内存的开销相当大。一种解决方法是,直接计算每个位置对应的值(build_3a和build_3b函数);另一种解决方法则是:将每行拆分成三部份,一部分等于上一行同一列数值减1,中间部分是一断连续的递增或递减的数列(其起始和结束值可由公式算得),最后一部分的数等于上一行同一列数值加1(build_4)。为了测试方便,加了一个build_basic函数,先行后列填充1到N2的等差数列。
测试结果有点出乎意外,效率最高的build_basic、build_3a、build_3b和build_4这几个函数所用时间相当接近,其它几个函数的效率彼此间也相差不大。由于程序的性能瓶颈在于对内存访问的效率,二维数组的部局,CPU的缓存大小,内存页的大小等都对测试结果有很大影响,使得测试结果不精确。下面仅列出一个极端情况下的结果:
各种方法构建N*N矩阵所有时间(ms)
| 5119 | 5120 | 5121 |
build_1a | 387 | 1259 | 331 |
build_1b | 390 | 1259 | 331 |
build_2a | 418 | 1259 | 325 |
build_2b | 375 | 1256 | 312 |
build_2c | 371 | 1187 | 312 |
build_3a | 140 | 137 | 137 |
build_3b | 134 | 134 | 134 |
build_4 | 134 | 134 | 162 |
basic | 134 | 131 | 134 |
测试代码:


#include < iostream >
#include < algorithm >
#include < vector >
#include < ctime >
#include < windows.h >
using std::min;
using std::vector;
using std::cout;
const int N = 5120 ;
int arr[N][N];
void basic_build( int n)
{
for ( int i = 0 , s = 1 ; i < n; ++ i)
for ( int j = 0 ; j < n; ++ j)
arr[i][j] = s ++ ;
}
void build_1a( int n)
{
const int count = n / 2u ;
int s = 0 ;
for ( int i = 0 ; i < count; ++ i) {
const int C = n - 1 - i;
for ( int j = i; j < C; ++ j) arr[i][j] = ++ s;
for ( int j = i; j < C; ++ j) arr[j][C] = ++ s;
for ( int j = C; j > i; -- j) arr[C][j] = ++ s;
for ( int j = C; j > i; -- j) arr[j][i] = ++ s;
}
if (n & 1 ) arr[count][count] = ++ s;
}
void build_1b( int n)
{
const int count = n / 2u ;
for ( int i = 0 ; i < count; ++ i) {
const int C = n - 1 - i;
const int rr = C - i;
const int s = 4 * i * (n - i) + 1 ;
for ( int j = i, k = s; j < C; ++ j) arr[i][j] = k ++ ;
for ( int j = i, k = s + rr; j < C; ++ j) arr[j][C] = k ++ ;
for ( int j = C, k = s + 2 * rr; j > i; -- j) arr[C][j] = k ++ ;
for ( int j = C, k = s + rr * 3 ; j > i; -- j) arr[j][i] = k ++ ;
}
if (n & 1 ) arr[count][count] = n * n;
}
void build_2a( int n)
{
const int count = n / 2u ;
for ( int i = 0 , s = 1 ; i < count; ++ i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for ( int j = i, k = C; j < C; ++ j, -- k) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[C][k] = s + 2 * len;
arr[k][i] = s + 3 * len;
++ s;
}
s += 3 * len;
}
if (n & 1 ) arr[count][count] = n * n;
}
void build_2b( int n)
{
const int count = n / 2u ;
for ( int i = 0 , s = 1 ; i < count; ++ i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
for ( int j = i, ss = s + 4 * len - 1 ; j < C; ++ j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j + 1 ][i] = ss;
arr[C][j + 1 ] = ss - len;
++ s;
-- ss;
}
s += 3 * len;
}
if (n & 1 ) arr[count][count] = n * n;
}
void build_2c( int n)
{
const int count = n / 2u ;
for ( int i = 0 , s = 1 ; i < count; ++ i) {
const int len = n - 1 - 2 * i;
const int C = n - 1 - i;
arr[i][i] = s;
arr[i][C] = s + len;
arr[C][C] = s + 2 * len;
arr[C][i] = s + 3 * len;
++ s;
for ( int j = i + 1 , ss = s + 4 * len - 2 ; j < C; ++ j) {
arr[i][j] = s;
arr[j][C] = s + len;
arr[j][i] = ss;
arr[C][j] = ss - len;
++ s;
-- ss;
}
s += 3 * len;
}
if (n & 1 ) arr[count][count] = n * n;
}
void build_3a( int n)
{
for ( int i = 0 ; i < n; ++ i) {
for ( int j = 0 ; j < n; ++ j) {
if (i <= j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2 );
} else {
int k = min(j, n - 1 - i) + 1 ;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1 ) * 2 );
}
}
}
}
void build_3b( int n)
{
for ( int i = 0 ; i < n; ++ i) {
for ( int j = 0 ; j < i; ++ j) {
int k = min(j, n - 1 - i) + 1 ;
arr[i][j] = 4 * k * (n - k) + 1 - (i + j - (k - 1 ) * 2 );
}
for ( int j = i; j < n; ++ j) {
int k = min(i, n - 1 - j);
arr[i][j] = 4 * k * (n - k) + 1 + (i + j - k * 2 );
}
}
}
void build_4( int n)
{
for ( int j = 0 ; j < n; ++ j) arr[ 0 ][j] = j + 1 ;
const int mid = (n + 1 ) / 2u ;
for ( int i = 1 ; i < mid; ++ i) {
int j = 0 ;
for (; j + 1 < i; ++ j) arr[i][j] = arr[i - 1 ][j] - 1 ;
int s = 4 * i * (n - i);
for ( int C = n - i; j < C; ++ j) arr[i][j] = s ++ ;
for (; j < n; ++ j) arr[i][j] = arr[i - 1 ][j] + 1 ;
}
for ( int i = mid; i < n; ++ i) {
int j = 0 ;
int C = n - 1 - i;
for (; j < C; ++ j) arr[i][j] = arr[i - 1 ][j] - 1 ;
int s = 4 * C * (n - C) + 1 + 3 * (n - 1 - 2 * C);
for (; j <= i; ++ j) arr[i][j] = s -- ;
for (; j < n; ++ j) arr[i][j] = arr[i - 1 ][j] + 1 ;
}
}
void print( int n)
{
for ( int i = 0 ; i < n; ++ i) {
for ( int j = 0 ; j < n; ++ j)
cout.width( 3 ),cout << arr[i][j] << " " ;
cout << " \n " ;
}
cout << " \n " ;
}
struct Func {
const char * name;
void ( * func)( int n);
};
void test(Func pf[], size_t len, int n, int count = 1 , int M = 1 )
{
if (count < 0 ) {
for (size_t k = 0 ; k < len; ++ k) {
cout << pf[k].name << " :\n " ;
pf[k].func(n);
print(n);
}
return ;
}
static vector < size_t > a;
a.assign(len, 0 );
basic_build(n);
for ( int k = 0 ; k < count; ++ k)
for (size_t i = 0 ; i < len; ++ i) {
clock_t ta = clock();
for ( int j = 0 ; j < M; ++ j) pf[i].func(n);
ta = clock() - ta;
printf( " %d %s %ld\n " ,n, pf[i].name, ta);
a[i] += ta;
}
int total = M * count;
if (total <= 0 ) return ;
cout << " \nResult: " << n << " \n " ;
for (size_t k = 0 ; k < len; ++ k)
cout << pf[k].name << " " << a[k] / total << " \n " ;
cout << " \n " ;
}
int main()
{
SYSTEM_INFO info;
GetSystemInfo( & info);
if (info.dwNumberOfProcessors >= 2 )
SetProcessAffinityMask( GetCurrentProcess(), 2 );
Func pf[] = {
{ " build_1a " , build_1a},
{ " build_1b " , build_1b},
{ " build_2a " , build_2a},
{ " build_2b " , build_2b},
{ " build_2c " , build_2c},
{ " build_3a " , build_3a},
{ " build_3b " , build_3b},
{ " build_4 " , build_4},
{ " basic " , basic_build},
};
const size_t sz = sizeof (pf) / sizeof (pf[ 0 ]);
// test(pf, sz, 5, -1);
// test(pf, sz, N, 5);
test(pf, sz, N, 1 , 5 );
}