4*4的矩阵,数字范围是[1, 4],直接用数字表示的话,需要用3个bit表示,3*16=48,不尴不尬的,所以可以用4个bit表示一个数字,虽然冗余了,但16进制表示上还是比3bit的好看些:
#include <cstdio>
typedef unsigned long long ULL;
ULL rowMask[4][4] = {0}, wholeRowMask[4] = {0};
ULL colMask[4][4] = {0}, wholeColMask[4] = {0};
/*
0 1 2 3
4 5 6 7
8 9 10 11
12 13 14 15
*/
void initMask()
{
for(int i = 0; i < 4; ++i){
for(int j = 0; j < 4; ++j){
rowMask[i][j] = static_cast<ULL>(0x0F) << ((4*i+j) << 2);
wholeRowMask[i] |= rowMask[i][j];
colMask[i][j] = static_cast<ULL>(0x0F) << ((i+4*j) << 2);
wholeColMask[i] |= colMask[i][j];
}
}
}
inline bool rowSame(ULL state)
{
ULL tmp = state & wholeRowMask[0];
return tmp == (state & wholeRowMask[1]) >> 16 &&
tmp == (state & wholeRowMask[2]) >> 32 &&
tmp == (state & wholeRowMask[3]) >> 48 ;
}
inline bool colSame(ULL state)
{
ULL tmp = state & wholeColMask[0];
return tmp == (state & wholeColMask[1]) >> 4 &&
tmp == (state & wholeColMask[2]) >> 8 &&
tmp == (state & wholeColMask[3]) >> 12 ;
}
inline ULL rotateLeft(ULL state, int row)
{
ULL res = state & ~wholeRowMask[row];
res |= (rowMask[row][1] & state) >> 4;
res |= (rowMask[row][2] & state) >> 4;
res |= (rowMask[row][3] & state) >> 4;
res |= (rowMask[row][0] & state) << 12;
return res;
}
inline ULL rotateRight(ULL state, int row)
{
ULL res = state & ~wholeRowMask[row];
res |= (rowMask[row][0] & state) << 4;
res |= (rowMask[row][1] & state) << 4;
res |= (rowMask[row][2] & state) << 4;
res |= (rowMask[row][3] & state) >> 12;
return res;
}
inline ULL rotateUp(ULL state, int col)
{
ULL res = state & ~wholeColMask[col];
res |= (colMask[col][1] & state) >> 16;
res |= (colMask[col][2] & state) >> 16;
res |= (colMask[col][3] & state) >> 16;
res |= (colMask[col][0] & state) << 48;
return res;
}
inline ULL rotateDown(ULL state, int col)
{
ULL res = state & ~wholeColMask[col];
res |= (colMask[col][0] & state) << 16;
res |= (colMask[col][1] & state) << 16;
res |= (colMask[col][2] & state) << 16;
res |= (colMask[col][3] & state) >> 48;
return res;
}
bool dfs(ULL state, int steps)
{
if(steps == 0) return rowSame(state) || colSame(state);
//try to rotate one row or one col
--steps;
for(int i = 0; i < 4; ++i){
if(dfs(rotateLeft(state, i), steps)) return true;
if(dfs(rotateRight(state, i), steps)) return true;
if(dfs(rotateUp(state, i), steps)) return true;
if(dfs(rotateDown(state, i), steps)) return true;
}
return false;
}
int main()
{
int test, i, j, n, steps;
ULL state;
initMask();
for(scanf("%d", &test); test; --test){
state = 0;
for(i = 0; i < 4; ++i){
for(j = 0; j < 4; ++j){
scanf("%d", &n);
state |= static_cast<ULL>(n) << ((4*i+j) << 2);
}
}
for(steps = 0; steps < 6; ++steps){
if(dfs(state, steps)) break;
}
printf("%d\n", steps < 6 ? steps : -1);
}
return 0;
}
实际上我们将矩阵的所有数字都减去1,则范围变为[0, 3],则现在只需要用2个bit就可以表示一个数字了,2*16=32,刚好是一个int的长度:
#include <cstdio>
typedef unsigned int UINT;
UINT rowMask[4][4] = {0}, wholeRowMask[4] = {0};
UINT colMask[4][4] = {0}, wholeColMask[4] = {0};
/*
0 1 2 3
4 5 6 7
8 9 10 11
12 13 14 15
*/
void initMask()
{
for(int i = 0; i < 4; ++i){
for(int j = 0; j < 4; ++j){
rowMask[i][j] = static_cast<UINT>(0x03) << ((4*i+j) << 1);
wholeRowMask[i] |= rowMask[i][j];
colMask[i][j] = static_cast<UINT>(0x03) << ((i+4*j) << 1);
wholeColMask[i] |= colMask[i][j];
}
}
}
inline bool rowSame(UINT state)
{
UINT tmp = state & wholeRowMask[0];
return tmp == (state & wholeRowMask[1]) >> 8 &&
tmp == (state & wholeRowMask[2]) >> 16 &&
tmp == (state & wholeRowMask[3]) >> 24 ;
}
inline bool colSame(UINT state)
{
UINT tmp = state & wholeColMask[0];
return tmp == (state & wholeColMask[1]) >> 2 &&
tmp == (state & wholeColMask[2]) >> 4 &&
tmp == (state & wholeColMask[3]) >> 6 ;
}
inline UINT rotateLeft(UINT state, int row)
{
UINT res = state & ~wholeRowMask[row];
res |= (rowMask[row][1] & state) >> 2;
res |= (rowMask[row][2] & state) >> 2;
res |= (rowMask[row][3] & state) >> 2;
res |= (rowMask[row][0] & state) << 6;
return res;
}
inline UINT rotateRight(UINT state, int row)
{
UINT res = state & ~wholeRowMask[row];
res |= (rowMask[row][0] & state) << 2;
res |= (rowMask[row][1] & state) << 2;
res |= (rowMask[row][2] & state) << 2;
res |= (rowMask[row][3] & state) >> 6;
return res;
}
inline UINT rotateUp(UINT state, int col)
{
UINT res = state & ~wholeColMask[col];
res |= (colMask[col][1] & state) >> 8;
res |= (colMask[col][2] & state) >> 8;
res |= (colMask[col][3] & state) >> 8;
res |= (colMask[col][0] & state) << 24;
return res;
}
inline UINT rotateDown(UINT state, int col)
{
UINT res = state & ~wholeColMask[col];
res |= (colMask[col][0] & state) << 8;
res |= (colMask[col][1] & state) << 8;
res |= (colMask[col][2] & state) << 8;
res |= (colMask[col][3] & state) >> 24;
return res;
}
bool dfs(UINT state, int steps)
{
if(steps == 0) return rowSame(state) || colSame(state);
//try to rotate one row or one col
--steps;
for(int i = 0; i < 4; ++i){
if(dfs(rotateLeft(state, i), steps)) return true;
if(dfs(rotateRight(state, i), steps)) return true;
if(dfs(rotateUp(state, i), steps)) return true;
if(dfs(rotateDown(state, i), steps)) return true;
}
return false;
}
int main()
{
int test, i, j, n, steps;
UINT state;
initMask();
for(scanf("%d", &test); test; --test){
state = 0;
for(i = 0; i < 4; ++i){
for(j = 0; j < 4; ++j){
scanf("%d", &n);
state |= static_cast<UINT>(n-1) << ((4*i+j) << 1);
}
}
for(steps = 0; steps < 6; ++steps){
if(dfs(state, steps)) break;
}
printf("%d\n", steps < 6 ? steps : -1);
}
return 0;
}
可以看到速度快了一倍,可以估计CPU操纵int型的数据还是比long long快一倍呢,虽然硬件电路上64bit的寄存器进行位操作和32bit的寄存器进行位操作相差无几(因为并行)