GPP中广播是容错的,这就是说,在广播的过程中,如果强制杀死几个进程,广播依然不受影响。为了验证其容错功能,编写以下测试程序:
/*
* main.cpp
*
* Created on: Apr 9, 2013
* Author: lichao
*/
#include "gpp.h"
#include <cstdlib>
class Packet : public GPP::Serializable
{
public:
Packet()
{
_buf = NULL;
_size = 0;
}
Packet(char * buf,int size)
{
_buf = (char *) malloc(size);
memcpy(_buf,buf,size);
_size = size;
}
virtual ~Packet()
{
if(!_buf)
{
free(_buf);
_buf = NULL;
}
}
void writeObject(GPP::DataStream& out)
{
out << _size;
out.writeRawData(_buf,_size);
}
void readObject(GPP::DataStream& in)
{
in >> _size;
_buf = (char *)malloc(_size);
in.readRawData(_buf,_size);
}
private:
char * _buf;
int _size;
};
class Handler : public GPP::BcastHandler<Packet>
{
public:
Handler(){}
virtual ~Handler()
{
}
void handle(GPP::SharedPtr<Packet>& obj)
{
counter++;
GPP::Log::Info("Received data for %5d.",counter);
fflush(stdout);
}
static int getCounter()
{
return counter;
}
private:
static int counter;
};
int Handler::counter = 0;
int main(int argc,char *argv[])
{
GPP::Init();
GPP::Comm::RegisterHandler(99,new Handler());
int size;
char * buf;
int rank = GPP::Comm::GetRank();
if(rank == 0)
{
size = 1 << 25;
buf = (char *)malloc(size);
Packet * p = new Packet(buf,size);
int loops = 100;
while(loops--)
{
int rc = GPP::Comm::Bcast(p,99);
if(rc < 0)
{
GPP::Log::Error(GPP::StrErr(rc));
fflush(stdout);
}
}
delete p;
}
GPP::MPI_Barrier(100);
GPP::Log::Trace("Counter is %5d\n",Handler::getCounter());
GPP::Finalize();
return 0;
}
程序使用计数器来验证广播的容错设计,即不能出现丢包。
测试结果发现:强制杀死进程后,广播将挂起,即不能继续往下执行。这说明GPP中的广播在容错设计方面还有bug。

被折叠的 条评论
为什么被折叠?



