hyd.modify

MPI通讯调试技巧

1. vi src/cuda/Makefile

enable verbose print

38 DEFINES += -DVERBOSE_PRINT


添加printf打印MPI_Send/MPI_Recv模式

Idea: 在每个MPI_Send / MPI_Recv之前,打印sender/receiver的MPI rank

./src/pgesv/HPL_spreadN.c

159    MPI_Group                 g1, basegroup;
160    int                       ranks[1], ranks_out[1];
161    char                      hostname[50];
203                   if( ierr == MPI_SUCCESS ) {
204 //                   MPI_Comm_rank(comm,&rank);
205                      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
206                      gethostname(hostname, 50);
207                      ranks[0]=IPMAP[npm1-partner];
208                      MPI_Comm_group( comm, &g1 );
209                      MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
210                      MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
211                      printf("<HPL_spreadN> [%s]rank[%d] receives from left rank[%d]\n", hostname, rank, ranks_out[0]);
212                      ierr =   MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type,
213                                         IPMAP[npm1-partner], Cmsgid, comm,
214                                         &status );
215                   }
228                      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
229                      gethostname(hostname, 50);
230                      ranks[0]=IPMAP[npm1-partner];
231                      MPI_Comm_group( comm, &g1 );
232                      MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
233                      MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
234                      printf("<HPL_spreadN> [%s]rank[%d] sends to left rank[%d]\n", hostname, rank, ranks_out[0]);
286                   if( ierr == MPI_SUCCESS ) {
288                      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
289                      gethostname(hostname, 50);
290                      ranks[0]=IPMAP[SRCDIST+partner];
291                      MPI_Comm_group( comm, &g1 );
292                      MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
293                      MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
294                      printf("<HPL_spreadN> [%s]rank[%d] receives from right rank[%d]\n", hostname, rank, ranks_out[0]);
311                   if( ierr == MPI_SUCCESS ) {
313                      MPI_Comm_rank(MPI_COMM_WORLD, &rank);
314                      gethostname(hostname, 50);
315                      ranks[0]=IPMAP[SRCDIST+partner];
316                      MPI_Comm_group( comm, &g1 );
317                      MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
318                      MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
319                      printf("<HPL_spreadN> [%s]rank[%d] sends to right rank[%d]\n", hostname, rank, ranks_out[0]);
322                      ierr =   MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm );

./src/pgesv/HPL_rollN.c

申请变量

MPI_Group          g1, basegroup;
int                        ranks[1], ranks_out[1];
char                     hostname[50];

计算local group g1和global group  basegroup

comm = PANEL->grid->col_comm;
MPI_Comm_group( comm, &g1 );
MPI_Comm_group( MPI_COMM_WORLD, &basegroup );

在MPI_Send/MPI_Recv之前计算local rank相对应的global rank

if( ierr == MPI_SUCCESS ){
   MPI_Comm_rank(comm,&rank);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   gethostname(hostname, 50);
   MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
   printf("<HPL_rollN-Irecv> [%s]rank[%d] receives from rank[%d]\n", hostname, rank, ranks_out[0]);
   ierr =   MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], partner, Cmsgid, comm, &request );
}

if( ierr == MPI_SUCCESS ){
   MPI_Comm_rank(comm,&rank);
   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   gethostname(hostname, 50);
   MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
   printf("<HPL_rollN-Send> [%s]rank[%d] sends to rank[%d]\n", hostname, rank, ranks_out[0]);  //print global rank
//   printf("<HPL_rollN-Irecv> [%s]rank[%d] sends to rank[%d]\n", hostname, rank, partner);  //only print local rank
   ierr =   MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND],
                      partner, Cmsgid, comm );
}

./src/comm/HPL_1ring.c

MPI_Group    g1, basegroup;
int                 ranks[1], ranks_out[1], glo_rank,loc_rank;  //glo_rank很重要,之前直接取到局部rank变量,引起计算错误
char              hostname[50];
153    if( rank == root )
154    {
155         gethostname(hostname, 50);
156         MPI_Comm_group( comm, &g1 );
157         MPI_Comm_rank(MPI_COMM_WORLD, &glo_rank);
158         ranks[0]=MModAdd1(rank, size);
159         MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
160         MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
161         printf("<HPL_1ring> rank[%d] sends to rank[%d]\n", glo_rank, ranks_out[0]);
162         ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank,
163                        size ), msgid, comm );
173          if( go != 0 )
174          {
175             MPI_Comm_rank(MPI_COMM_WORLD, &glo_rank);
176             MPI_Comm_group( comm, &g1 );
177             MPI_Comm_group( MPI_COMM_WORLD, &basegroup );
178             ranks[0]=prev;
179             MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
180             printf("<HPL_1ring> rank[%d] receives from rank[%d]\n", glo_rank, ranks_out[0]);
181             ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid,
182                              comm, &PANEL->status[0] );
183             next = MModAdd1( rank, size );
184             if( ( ierr == MPI_SUCCESS ) && ( next != root ) )
185             {
186             ranks[0]=next;
187             MPI_Group_translate_ranks(g1, 1, ranks, basegroup, ranks_out);
188             printf("<HPL_1ring> rank[%d] sends to rank[%d]\n", glo_rank, ranks_out[0]);
189                ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next,
190                                 msgid, comm );


implicit none include 'genrl.h' include 'ufiles.h' c c Declarations. integer ientry character*8 ityp1,rtit1,rtit2 c c Execution. c copyright(1:21) = ' Copyright (C) 2001' copyright(22:64) = 'Information Systems Laboratories, Inc.' if (ientry .eq. 1) then ptitle(1:12) = ' RBIC/3.3' *if -def,ngentitl ptitle(13:24) = ' ' ptitle(25:64) = 'RELAP5 Based Integrated Code' *endif *if def,titlnrc * ptitle(13:22) = '#ge_nrc' * ptitle(23:64) = 'DEVELOPMENTAL RELAP5 Based Integrated Code' *endif *if -def,titlnrc else if (ientry .eq. 2) then if (ityp1 .eq. 'newslp') then ptitle(1:12) = ' SCDAP/3.2' ptitle(25:64) = 'Severe Accident Analysis Program ' else if (ityp1 .eq. 'new') then ptitle(1:12) = ' RELAP5/3.3' ptitle(25:64) = 'Reactor Loss Of Coolant Analysis Program' else if (ityp1(1:6) .eq. 'newath') then ptitle(1:12) = ' ATHENA/3.2' ptitle(25:64) = 'Adv. Therm. Hyd. Energy Network Analyzer' endif else if (ientry .eq. 3) then if (rtit1(1:8) .eq. ' SCDAP') then ptitle(1:12) = ' SCDAP/3.2' ptitle(25:64) = 'Severe Accident Analysis Program ' else if (rtit1(1:8) .eq. ' RELAP5') then ptitle(1:12) = ' RELAP5/3.3' ptitle(25:64) = 'Reactor Loss Of Coolant Analysis Program' else if (rtit1(1:8) .eq. ' ATHENA') then ptitle(1:12) = ' ATHENA/3.2' ptitle(25:64) = 'Adv. Therm. Hyd. Energy Network Analyzer' else write (output,2001) 2001 format ('0$$$$$$$$ WARNING, program name on restart file is not r *ecognized.') endif *endif *if def,titlnrc * else * write (output,2001) *2001 format ('0$$$$$$$$ WARNING, program name on restart file is not r * *ecognized.') *endif endif return end
最新发布
03-11
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值