1. 阻塞通信的发生和解决
MPI的p2p中包括两种模式:
a. 阻塞通信(Blocking)
MPI_Send
和 MPI_Recv
在返回前,必须满足以下条件之一:MPI_Send
完成消息的发送(或在缓冲区中完成存储)和MPI_Recv
完成消息的接收(数据已经被复制到接收缓冲区中)。此时,直到完成发送和接收,这两个函数会让进程在此处“停住”。
b. 非阻塞通信(Non-blocking)
发送和接收操作被立即返回,通信操作在后台继续进行。需要通过 MPI_Test
或 MPI_Wait
等函数来判断通信是否完成。
2. 点对点通信应用程序案例 - 随机步行(Random Walk)
问题描述:给定左右边界Min,Max和游走器Walker。游走器 W 向右以任意长度的 S 随机移动。 如果该过程越过边界,它就会绕回。如何并行化随机游走问题?
首先在各个进程之间划分域。 并将域的大小定义为Max - Min + 1,(因为游走器包含 Max 和 Min)。 假设游走器只能采取整数大小的步长,我们可以轻松地将域在每个进程中划分为大小近乎相等的块。 例如,如果 Min 为 0,Max 为 20,并且我们有四个进程,则将像这样拆分域。
012每个域有五个单元,最后一个域有6个单元。例如,如果游走器在进程 0(使用先前的分解域)上进行了移动总数为 6 的游走,则游走器的执行将如下所示:
- 游走器的步行长度开始增加。但是,当它的值达到 4 时,它已到达进程 0 的边界。因此,进程 0 必须与进程 1 交流游走器的信息。
- 进程 1 接收游走器,并继续移动,直到达到移动总数 6。然后,游走器可以继续进行新的随机移动。
W 仅需从进程 0 到进程 1 进行一次通信。
#include <iostream>
#include <mpi.h>
#include <vector>
#include <cstdlib>
#include <time.h>
using namespace std;
void decompose_domain (int domain_size, int world_rank, int world_size,
int * subdomain_start, int * subdomain_size) {
/* 将域分割成偶数个块,函数返回子域开始和子域大小 */
if (world_size > domain_size) {
MPI_Abort(MPI_COMM_WORLD, 1);
}
*subdomain_start = domain_size / world_size * world_rank;
*subdomain_size = domain_size / world_size;
if (world_rank == world_size - 1) {
*subdomain_size += domain_size % world_size;
}
}
typedef struct {
int location;
int num_step_left_in_walk;
} Walker;
void initialize_walker (int num_walker_per_proc, int max_walk_size, int subdomain_start,
vector<Walker> *incoming_walkers) {
Walker walker;
for (int i = 0; i < num_walker_per_proc; i++) {
// Initialize walkers in the middle of the subdomain
walker.location = subdomain_start;
walker.num_step_left_in_walk = (rand() / (float)RAND_MAX) * max_walk_size;
incoming_walkers->push_back(walker);
}
}
void walk(Walker* walker, int subdomain_start, int subdomain_size,
int domain_size, vector<Walker>* outgoing_walkers) {
while (walker->num_step_left_in_walk > 0) {
if (walker->location >= subdomain_start + subdomain_size) {
// Take care of the case when the walker is at the end
// of the domain by wrapping it around to the beginning
if (walker->location == domain_size) {
walker->location = 0;
}
outgoing_walkers->push_back(*walker);
break;
} else {
walker->num_step_left_in_walk--;
walker->location++;
}
}
}
void send_outgoing_walkers(vector<Walker>* outgoing_walkers,
int world_rank, int world_size) {
// Send the data as an array of MPI_BYTEs to the next process.
// The last process sends to process zero.
MPI_Send((void*)outgoing_walkers->data(),
outgoing_walkers->size() * sizeof(Walker), MPI_BYTE,
(world_rank + 1) % world_size, 0, MPI_COMM_WORLD);
// Clear the outgoing walkers list
outgoing_walkers->clear();
}
void receive_incoming_walkers(vector<Walker>* incoming_walkers,
int world_rank, int world_size) {
// Probe for new incoming walkers
MPI_Status status;
// Receive from the process before you. If you are process zero,
// receive from the last process
int incoming_rank = (world_rank == 0) ? world_size - 1 : world_rank - 1;
MPI_Probe(incoming_rank, 0, MPI_COMM_WORLD, &status);
// Resize your incoming walker buffer based on how much data is
// being received
int incoming_walkers_size;
MPI_Get_count(&status, MPI_BYTE, &incoming_walkers_size);
incoming_walkers->resize(incoming_walkers_size / sizeof(Walker));
MPI_Recv((void*)incoming_walkers->data(), incoming_walkers_size,
MPI_BYTE, incoming_rank, 0, MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
}
int main(int argc, char** argv) {
int domain_size;
int max_walk_size;
int num_walkers_per_proc;
if (argc < 4) {
cerr << "Usage: random_walk domain_size max_walk_size "
<< "num_walkers_per_proc" << endl;
exit(1);
}
domain_size = atoi(argv[1]);
max_walk_size = atoi(argv[2]);
num_walkers_per_proc = atoi(argv[3]);
MPI_Init(NULL, NULL);
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
srand(time(NULL) * world_rank);
int subdomain_start, subdomain_size;
vector<Walker> incoming_walkers, outgoing_walkers;
// Find your part of the domain
decompose_domain(domain_size, world_rank, world_size,
&subdomain_start, &subdomain_size);
// Initialize walkers in your subdomain
initialize_walker(num_walkers_per_proc, max_walk_size, subdomain_start,
&incoming_walkers);
cout << "Process " << world_rank << " initiated " << num_walkers_per_proc
<< " walkers in subdomain " << subdomain_start << " - "
<< subdomain_start + subdomain_size - 1 << endl;
// Determine the maximum amount of sends and receives needed to
// complete all walkers
int maximum_sends_recvs = max_walk_size / (domain_size / world_size) + 1;
for (int m = 0; m < maximum_sends_recvs; m++) {
// Process all incoming walkers
for (int i = 0; i < incoming_walkers.size(); i++) {
walk(&incoming_walkers[i], subdomain_start, subdomain_size,
domain_size, &outgoing_walkers);
}
cout << "Process " << world_rank << " sending " << outgoing_walkers.size()
<< " outgoing walkers to process " << (world_rank + 1) % world_size
<< endl;
if (world_rank % 2 == 0) {
// Send all outgoing walkers to the next process.
send_outgoing_walkers(&outgoing_walkers, world_rank,
world_size);
// Receive all the new incoming walkers
receive_incoming_walkers(&incoming_walkers, world_rank,
world_size);
} else {
// Receive all the new incoming walkers
receive_incoming_walkers(&incoming_walkers, world_rank,
world_size);
// Send all outgoing walkers to the next process.
send_outgoing_walkers(&outgoing_walkers, world_rank,
world_size);
}
cout << "Process " << world_rank << " received " << incoming_walkers.size()
<< " incoming walkers" << endl;
}
cout << "Process " << world_rank << " done" << endl;
MPI_Finalize();
return 0;
}
// instruction
// mpic++ random_walk.cc -o random_walk
// mpirun -np 5 ./random_walk 100 500 20
下一节将会说集合通信