#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include "veloc.h" // 包含VELOC的头文件
#include <assert.h>
int main(int argc, char **argv) {
int rank, size;
// 初始化MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
// 初始化VELOC
VELOC_Init(MPI_COMM_WORLD, argv[2]);
int i = 0;
double *h = (double *) malloc(sizeof(double) * 1024);
double *g = (double *) malloc(sizeof(double) * 1024);
VELOC_Mem_protect(0, &i, 1, sizeof(int));
VELOC_Mem_protect(1, h, 1024, sizeof(double));
VELOC_Mem_protect(2, g, 1024, sizeof(double));
while( i < 100){
if (i % 10 == 0){
assert(VELOC_Checkpoint("yixin_test", i) == VELOC_SUCCESS);
printf("Process %d created a checkpoint at iteration %d\n", rank, i);
}
i++;
}
VELOC_Finalize(0);
MPI_Finalize();
return 0;
}
环境配置脚本(使用source命令执行):
#!/bin/bash
#首先创建工作目录
mkdir work
cd work
#在工作目录下创建配置文件,将一些配置写入配置文件
echo scratch=./scratch >> test.cfg
echo persistent=./persistent >> test.cfg
echo mode=async >> test.cfg
INSTALL_DIR=/home/yixin/veloc/v1/install
SOUCE_DIR=/home/yixin/veloc/v1/VELOC
export LD_LIBRARY_PATH=$INSTALL_DIR/lib
export PATH=$INSTALL_DIR/bin:$PATH
头文件位于install/include,库文件位于install/lib,服务器二进制可执行文件位于insatll/bin
2024/11/09
输入命令行:
mpicc main.c -L/home/username/software_install/veloc/veloc_install_dir/lib/ -I/home/username/software_install/veloc/veloc_install_dir/include/ -lveloc-client -lveloc-modules -laxl -laxl_mpi -ler -lkvtree -lkvtree_base -lrankstr -lredset -lredset_base -lshuffile
要把lib/目录下的所有库都通过-l选项指明,才能通过编译。
得到可执行文件a.out后,再创建一个配置文件/tmp/work/test.cfg:
scratch = /tmp/scratch
persistent = /tmp/persistent
mode = async
由于a.out是MPI程序,所以要用mpirun来启动:
mpirun -np 2 ./a.out /tmp/work/test.cfg
这样的启动方式会报错说无法打开共享链接库,找不到对象:
原因在于没有事先设置环境变量LD_LIBRARY_PATH。分两条命令,第一条设置环境变量,第二条再启动程序:
export LD_LIBRARY_PATH=/home/username/software_install/veloc/veloc_install_dir/lib
mpirun -np 2 ./a.out /tmp/work/test.cfg
报错:
报错:
本来想用gbd调试一下的,但是不知道怎么用gbd调试MPI程序。所以用最原始的方式,在源码中打印一些反馈信息:
//main.c
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include "veloc.h" // 包含VELOC的头文件
#include <assert.h>
//int main(int argc, char **argv) {
int main(int argc, char **argv) {
int rank, size;
// 初始化MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
// 初始化VELOC
int rc = VELOC_Init(MPI_COMM_WORLD, argv[2]);
if(rc != VELOC_SUCCESS){
printf("Process %d failed to initialize VELOC\n", rank);
}
else{
printf("Process %d has successfully initialized VELOC\n", rank);
}
int i = 0;
double *h = (double *) malloc(sizeof(double) * 1024);
double *g = (double *) malloc(sizeof(double) * 1024);
printf("the address of h is %p,and the address of g is %p\n", h, g);
VELOC_Mem_protect(0, &i, 1, sizeof(int));
VELOC_Mem_protect(1, h, 1024, sizeof(double));
VELOC_Mem_protect(2, g, 1024, sizeof(double));
while( i < 100){
if (i % 10 == 0){
assert(VELOC_Checkpoint("yixin_test", i) == VELOC_SUCCESS);
printf("Process %d created a checkpoint at iteration %d\n", rank, i);
}
i++;
}
VELOC_Finalize(0);
MPI_Finalize();
return 0;
}
按照之前的命令编译运行后,发现VELOC库的初始化就没有成功:
好像是VELOC_Init()的第二个参数传递得有问题,干脆直接硬编码为配置文件的路径:
int rc = VELOC_Init(MPI_COMM_WORLD, "/tmp/work/test.cfg");
虽然还是报错,但是有了点进展。根据目前最新版的报错信息,大概能够定位到问题的位置了。还需要把VELOC_Init的实现看一下。
2024/11/20
重写脚本,在该脚本中完成新建工作目录、填写配置文件、设置环境变量、编译程序、运行程序。
#!/bin/bash
# 首先创建工作目录
if [ -d /tmp/work ]; then
echo "Work directory already exists, removing it..."
rm -rf /tmp/work
fi
echo "Creating a new work directory..."
mkdir /tmp/work
cd /tmp/work
# 在工作目录下创建配置文件,将一些配置写入配置文件
echo "Creating a new configuration file..."
# 首先将数据写入内存文件系统
echo scratch=/dev/shm/scratch >> test.cfg
# 在后台将数据写入磁盘文件系统
echo persistent=/tmp/work/persistent >> test.cfg
echo mode=async >> test.cfg
#echo max_versions=1 >> test.cfg
INSTALL_DIR=/home/username/software_install/veloc/install
export LD_LIBRARY_PATH=$INSTALL_DIR/lib
export PATH=$INSTALL_DIR/bin:$PATH
export VELOC_BIN=$INSTALL_DIR/bin
if [ -e /tmp/test/a.out ]; then
echo "removing old executable..."
rm /tmp/test/a.out
fi
mpicc /tmp/test/main.c -o /tmp/test/a.out \
-L$INSTALL_DIR/lib/ -I$INSTALL_DIR/include/ \
-lveloc-client -lveloc-modules -laxl -laxl_mpi -ler -lkvtree \
-lkvtree_base -lrankstr -lredset -lredset_base -lshuffile
if [ $? -ne 0 ]; then
echo "compilation failed"
exit 1
else
echo "compilation successfully done"
fi
if [ -e /tmp/test/a.out ]; then
echo "running..."
mpirun -np 1 /tmp/test/a.out /tmp/work/test.cfg
else
echo "executable not found"
fi
echo "rertuning to souce code directory..."
cd /tmp/test
add a timer to the source code:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include "veloc.h" // 包含VELOC的头文件
#include <assert.h>
#include <time.h>
int main(int argc, char **argv) {
int rank, size;
clock_t start, end;
double cpu_time_used;
double mem_size = 1 * 1024 * 1024 * 1024; // 1GB
// 初始化MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
// 初始化VELOC
//VELOC_Init(MPI_COMM_WORLD, argv[2]);
int rc = VELOC_Init(MPI_COMM_WORLD, "/tmp/work/test.cfg");
if(rc != VELOC_SUCCESS){
printf("VELOC_Init failed with error code %d\n", rc);
MPI_Abort(MPI_COMM_WORLD, rc);
}
else{
printf("VELOC_Init succeeded\n");
}
char *h = (char *) malloc(mem_size);
if(h == NULL){
printf("malloc failed\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
VELOC_Mem_protect(0, h, mem_size, sizeof(char));
//开始计时
start = clock();
if(VELOC_Checkpoint("yixin_test", 0) == VELOC_SUCCESS){
printf("Process %d created a checkpoint\n", rank);
}
else{
printf("Process %d failed to create a checkpoint\n", rank);
}
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("rank %d: Time used: %f seconds\n", rank, cpu_time_used);
printf("computing done!\nlet's finalize VELOC and then MPI\n");
VELOC_Finalize(0);
MPI_Finalize();
return 0;
}
2024/11/26:使用文件接口
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include "veloc.h" // 包含VELOC的头文件
#include <assert.h>
#include <time.h>
#include <unistd.h>
#include <fcntl.h> // 包含open函数的声明
int main(int argc, char **argv) {
int rank, size;
clock_t start, end;
double cpu_time_used;
double mem_size = 1 * 1024 * 1024 * 1024; // 1GB
char veloc_file[VELOC_MAX_NAME];
char *h = (char *) malloc(mem_size);
// 初始化MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int rc = VELOC_Init(MPI_COMM_WORLD, "/tmp/work/test.cfg");
//开始计时
start = clock();
assert(VELOC_Route_file("/tmp/test/yixin_file",veloc_file) == VELOC_SUCCESS);
int fd = open(veloc_file, O_RDWR | O_CREAT, 0666);
int bytes_written = write(fd, h, mem_size);
close(fd);
end = clock();
cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("the original path is /tmp/tets/yixin_file\n");
printf("the routed path is %s\n", veloc_file);
printf("rank %d: Time used to write to %s is %f seconds\n", rank, veloc_file, cpu_time_used);
VELOC_Finalize(0);
MPI_Finalize();
}