改写以下程序代码令其能像上述代码一样完成mpi程序运行:#include <mpi.h>
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
const int n = 1389516;
const int TOTAL_ROWS = 8 * n;
// 三对角矩阵生成
void generate_tridiagonal(std::vector<double>& data, int start_row, int end_row) {
for (int i = start_row; i <= end_row; i++) {
// 行内非零元素位置(主对角线+相邻)
if (i > 0) data.push_back(1.0); // 下对角线
data.push_back(2.0); // 主对角线
if (i < TOTAL_ROWS - 1) data.push_back(1.0); // 上对角线
}
}
// 三斜对角矩阵生成
void generate_tridiagonal2(std::vector<double>& data, int start_row, int end_row) {
for (int i = start_row; i <= end_row; i++) {
// 主对角线±1, ±2位置
if (i - 2 >= 0) data.push_back(1.0);
if (i - 1 >= 0) data.push_back(2.0);
data.push_back(3.0); // 主对角线
if (i + 1 < TOTAL_ROWS) data.push_back(2.0);
if (i + 2 < TOTAL_ROWS) data.push_back(1.0);
}
}
// 矩阵乘法核函数
void sparse_multiply(
const std::vector<double>& A, // 三对角矩阵数据
const std::vector<double>& B, // 三斜对角矩阵数据
std::vector<double>& C, // 结果矩阵
int start_row, int local_rows)
{
for (int i = 0; i < local_rows; i++) {
const int global_i = start_row + i;
// 三对角非零元素位置
double a_prev = (global_i > 0) ? A[i * 3] : 0;
double a_curr = A[i * 3 + 1];
double a_next = (global_i < TOTAL_ROWS - 1) ? A[i * 3 + 2] : 0;
// 结果行计算 (7-diagonal)
for (int j = std::max(0, global_i - 3); j <= std::min(TOTAL_ROWS - 1, global_i + 3); j++) {
double sum = 0.0;
// B矩阵访问偏移计算
int b_offset = (global_i - start_row) * 5;
if (j >= global_i - 2 && j <= global_i + 2)
sum += a_curr * B[b_offset + (j - (global_i - 2))];
if (global_i > 0) {
b_offset = (global_i - 1 - start_row) * 5;
if (j >= global_i - 3 && j <= global_i + 1)
sum += a_prev * B[b_offset + (j - (global_i - 3))];
}
if (global_i < TOTAL_ROWS - 1) {
b_offset = (global_i + 1 - start_row) * 5;
if (j >= global_i - 1 && j <= global_i + 3)
sum += a_next * B[b_offset + (j - (global_i - 1))];
}
C[i * 7 + (j - (global_i - 3))] = sum;
}
}
}
int main(int argc, char** argv) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
const int local_rows = TOTAL_ROWS / size;
const int start_row = rank * local_rows;
const int end_row = (rank == size - 1) ? TOTAL_ROWS - 1 : (rank + 1) * local_rows - 1;
const int actual_rows = end_row - start_row + 1;
// 存储分配
std::vector<double> A_data;
std::vector<double> B_data;
std::vector<double> C_data(actual_rows * 7, 0.0); // 每行最多7个非零元
// 计时变量
double gen_start = MPI_Wtime();
generate_tridiagonal(A_data, start_row, end_row); // 生成局部A
generate_tridiagonal2(B_data, start_row, end_row); // 生成局部B
double gen_time = MPI_Wtime() - gen_start;
// 边界数据交换
double comm_time = 0.0;
if (size > 1) {
double comm_start = MPI_Wtime();
MPI_Status status;
const int tag = 0;
// 边界通信
if (rank > 0) { // 向上游接收
std::vector<double> recv_buffer(5);
MPI_Recv(recv_buffer.data(), 5, MPI_DOUBLE, rank - 1, tag,
MPI_COMM_WORLD, &status);
B_data.insert(B_data.begin(), recv_buffer.begin(), recv_buffer.end());
}
if (rank < size - 1) { // 向下游发送
std::vector<double> send_buffer(B_data.end() - 5, B_data.end());
MPI_Send(send_buffer.data(), 5, MPI_DOUBLE, rank + 1, tag,
MPI_COMM_WORLD);
}
comm_time = MPI_Wtime() - comm_start;
}
// 矩阵乘法
double comp_start = MPI_Wtime();
sparse_multiply(A_data, B_data, C_data, start_row, actual_rows);
double comp_time = MPI_Wtime() - comp_start;
// 性能统计
double total_time = gen_time + comm_time + comp_time;
double max_comm, max_comp, max_total;
MPI_Reduce(&comm_time, &max_comm, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(&comp_time, &max_comp, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce(&total_time, &max_total, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
// 加速比计算
if (rank == 0) {
std::cout << "进程数 | 通信时间(s) | 计算时间(s) | 总时间(s) | 加速比\n";
std::cout << "------------------------------------------------------\n";
}
if (rank == 0 && size > 1) {
static double base_time = max_total;
double speedup = base_time / max_total;
printf("%6d | %10.4f | %10.4f | %10.4f | %6.2f\n",
size, max_comm, max_comp, max_total, speedup);
}
MPI_Finalize();
return 0;
}