MPI其实是十分简单而又强大的并行库。只是这次让我花了半天的工夫才到出了一个一个微小的BUG,让我几乎崩溃。
原程序用于计算两个矩阵相乘。分配任务时,只把第一个矩阵分解传给若干个slave,第二个矩阵全传。虽然效率不高,但我的作业需要是把固定的矩阵大小改为可变的。
源代码:
#include
"
mpi.h
"
#include
<
stdio.h
>
#include
<
stdlib.h
>

#define
NRA 62 /* number of rows in matrix A */
#define
NCA 15 /* number of columns in matrix A */
#define
NCB 7 /* number of columns in matrix B */
#define
MASTER 0 /* taskid of first task */
#define
FROM_MASTER 1 /* setting a message type */
#define
FROM_WORKER 2 /* setting a message type */

int
main(argc,argv)
int
argc;
char
*
argv[];

...
{

int numtasks, /**//* number of tasks in partition */

taskid, /**//* a task identifier */

numworkers, /**//* number of worker tasks */

source, /**//* task id of message source */

dest, /**//* task id of message destination */

mtype, /**//* message type */

rows, /**//* rows of matrix A sent to each worker */

averow, extra, offset, /**//* used to determine rows sent to each worker */

i, j, k, rc; /**//* misc */

double a[NRA][NCA], /**//* matrix A to be multiplied */

b[NCA][NCB], /**//* matrix B to be multiplied */

c[NRA][NCB]; /**//* result matrix C */
MPI_Status status;

MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);

if (numtasks < 2 ) ...{
printf("Need at least two MPI tasks. Quitting... ");
MPI_Abort(MPI_COMM_WORLD, rc);
exit(1);
}
numworkers = numtasks-1;



/**//**************************** master task ************************************/
if (taskid == MASTER)

...{
printf("mpi_mm has started with %d tasks. ",numtasks);
printf("Initializing arrays... ");
for (i=0; i<NRA; i++)
for (j=0; j<NCA; j++)
a[i][j]= i+j;
for (i=0; i<NCA; i++)
for (j=0; j<NCB; j++)
b[i][j]= i*j;


/**//* Send matrix data to the worker tasks */
averow = NRA/numworkers;
extra = NRA%numworkers;
offset = 0;
mtype = FROM_MASTER;
for (dest=1; dest<=numworkers; dest++)

...{
rows = (dest <= extra) ? averow+1 : averow;
printf("Sending %d rows to task %d offset=%d ",rows,dest,offset);
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&a[offset][0], rows*NCA, MPI_DOUBLE, dest, mtype,
MPI_COMM_WORLD);
MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
offset = offset + rows;
}


/**//* Receive results from worker tasks */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++)

...{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&c[offset][0], rows*NCB, MPI_DOUBLE, source, mtype,
MPI_COMM_WORLD, &status);
printf("Received results from task %d ",source);
}


/**//* Print results */
printf("****************************************************** ");
printf("Result Matrix: ");
for (i=0; i<NRA; i++)

...{
printf(" ");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i][j]);
}
printf(" ****************************************************** ");
printf ("Done. ");
}



/**//**************************** worker task ************************************/
if (taskid > MASTER)

...{
mtype = FROM_MASTER;
MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&a, rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&b, NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);

for (k=0; k<NCB; k++)
for (i=0; i<rows; i++)

...{
c[i][k] = 0.0;
for (j=0; j<NCA; j++)
c[i][k] = c[i][k] + a[i][j] * b[j][k];
}
mtype = FROM_WORKER;
MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
}
MPI_Finalize();
}
改过的代码(已经加了很多调试输出):
#include
"
mpi.h
"
#include
<
stdio.h
>
#include
<
stdlib.h
>

#define
MASTER 0 /* taskid of first task */
#define
FROM_MASTER 1 /* setting a message type */
#define
FROM_WORKER 2 /* setting a message type */

int
main(
int
argc,
char
*
argv[])

...
{
int NRA;
int NCA;
int NCB;

int numtasks, /**//* number of tasks in partition */

taskid, /**//* a task identifier */

numworkers, /**//* number of worker tasks */

source, /**//* task id of message source */

dest, /**//* task id of message destination */

mtype, /**//* message type */

rows, /**//* rows of matrix A sent to each worker */

averow, extra, offset, /**//* used to determine rows sent to each worker */

i, j, k, rc; /**//* misc */

double * a, /**//* matrix A to be multiplied */

*b, /**//* matrix B to be multiplied */

*c; /**//* result matrix C */
MPI_Status status;

MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&taskid);
MPI_Comm_size(MPI_COMM_WORLD,&numtasks);
if (numtasks < 2 )

...{
printf("Need at least two MPI tasks. Quitting... ");
MPI_Abort(MPI_COMM_WORLD, rc);
exit(1);
}
numworkers = numtasks-1;



/**//**************************** master task ************************************/
if (taskid == MASTER)

...{
scanf("%d %d %d", &NRA, &NCA, &NCB);
a= new double[NRA*NCA];
b= new double[NCA*NCB];
c= new double[NRA*NCB];
for (int dest=1; dest<= numworkers; ++ dest)

...{
MPI_Send(&NRA, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
MPI_Send(&NCA, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
MPI_Send(&NCB, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD);
}
printf("mpi_mm has started with %d tasks. ",numtasks);
printf("Initializing arrays... ");
for (i=0; i<NRA; i++)
for (j=0; j<NCA; j++)
a[i*NCA+j]= i+j;
for (i=0; i<NCA; i++)
for (j=0; j<NCB; j++)
b[i*NCB+j]= i*j;

printf("****************************************************** ");
printf("A Matrix: ");
for (i=0; i<NRA; i++)

...{
printf(" ");
for (j=0; j<NCA; j++)
printf("%6.2f ", a[i*NCA+j]);
}
printf(" B Matrix: ");
for (i=0; i<NCA; i++)

...{
printf(" ");
for (j=0; j<NCB; j++)
printf("%6.2f ", b[i*NCB+j]);
}
printf(" ****************************************************** ");


/**//* Send matrix data to the worker tasks */
averow = NRA/numworkers;
extra = NRA%numworkers;
offset = 0;
mtype = FROM_MASTER;
for (dest=1; dest<=numworkers; dest++)

...{
rows = (dest <= extra) ? averow+1 : averow;
printf("Sending %d rows to task %d offset=%d apos %d. ",rows,dest,offset, &(a[offset*NCA]) );
MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
MPI_Send(&(a[offset*NCA]), rows*NCA, MPI_DOUBLE, dest, mtype,
MPI_COMM_WORLD);
MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
offset = offset + rows;
}


/**//* Receive results from worker tasks */
mtype = FROM_WORKER;
for (i=1; i<=numworkers; i++)

...{
source = i;
MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&(c[offset*NCB]), rows*NCB, MPI_DOUBLE, source, mtype,
MPI_COMM_WORLD, &status);
printf("Received results from task %d ",source);
}


/**//* Print results */
printf("****************************************************** ");
printf("Result Matrix: ");
for (i=0; i<NRA; i++)

...{
printf(" ");
for (j=0; j<NCB; j++)
printf("%6.2f ", c[i*NCB+j]);
}
printf(" ****************************************************** ");
printf ("Done. ");

delete []a;
delete []b;
delete []c;
}



/**//**************************** worker task ************************************/
if (taskid > MASTER)

...{
mtype = FROM_MASTER;

MPI_Recv(&NRA, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&NCA, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&NCB, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);

printf("processor %d : NRA %d, NCA %d, NCB %d. ", taskid, NRA, NCA, NCB);
a= new double[NRA*NCA];
b= new double[NCA*NCB];
c= new double[NRA*NCB];
printf("a addr : %d on procs %d. ", &a, taskid);
if (a==NULL || b==NULL || c==NULL)

...{
printf("Allocated error on procs %d. ", taskid);
}

MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);
MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status);

printf("processor %d : offset %d, rows %d. ", taskid, offset, rows);

MPI_Recv(&a, rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);

...{
int count;
MPI_Get_count(&status, MPI_DOUBLE, &count);
printf("recived %d data of a on procs %d, %d. ", count, taskid, *(a+2));
printf("a addr : %d on procs %d. ", &a, taskid);
}
MPI_Recv(&b, NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status);

...{
int count;
MPI_Get_count(&status, MPI_DOUBLE, &count);
printf("recived %d data of b on procs %d. ", count, taskid);
}

printf("******on processor %d ******************************** ", taskid);
printf("A Matrix: ");
for (i=0; i<NRA; i++)

...{
printf(" ");
for (j=0; j<NCA; j++)
printf("%6.2f ", a[i*NCA+j]);
}
printf(" B Matrix: ");
for (i=0; i<NCA; i++)

...{
printf(" ");
for (j=0; j<NCB; j++)
printf("%6.2f ", b[i*NCB+j]);
}
printf(" ****************************************************** ");

for (k=0; k<NCB; k++)
for (i=0; i<rows; i++)

...{
c[i*NCB+k] = 0.0;
for (j=0; j<NCA; j++)
c[i*NCB+k] = c[i*NCB+k] + a[i*NCA+j] * b[j*NCB+k];
}
mtype = FROM_WORKER;
MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);

delete []a;
delete []b;
delete []c;
}
MPI_Finalize();
}
以上程序的运行唯一结果,就是segment fault。
后在通过dbx工具在core中定位到printf("%6.2f ", a[i*NCA+j]);一句。经过分析,终于找到问题出现在由
double a[][] -> double *a = new [] 这样的转变中。
由于这样的转变,a变成了指针,因此使用Send或Recieve时,就不能再使用 &a 作为第一个参数了,而是直接使用a。
程序这样修改后,终于能正常执行了。而我也可以继续下一个作业了。