1. CBLAS
* Need BLAS Lib
TOPdir = /home/shir/mv/hpl
INCdir = $(TOPdir)/include
BINdir = $(TOPdir)/bin
LIBdir = $(TOPdir)/lib
HPLlib = $(LIBdir)/libhpl.a
MPdir = /home/shir/mv/mv2
MPinc = -I/home/shir/mv/mv2/include
MPlib = /home/shir/mv/mv2/lib/libmpich.a /home/shir/mv/mv2/lib/libmpichf90.a
LAdir = /home/shir/mv/BLAS /home/shir/mv/CBLAS
LAinc =
LAlib = /home/shir/mv/CBLAS/lib/cblas_LINUX.a /home/shir/mv/BLAS/blas_LINUX.a
F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lgfortran
HPL_OPTS = -DHPL_CALL_CBLAS
HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
CC = /home/shir/mv/mv2/bin/mpicc
CCNOOPT = $(HPL_DEFS)
CCFLAGS = $(HPL_DEFS)
#
LINKER = /home/shir/mv/mv2/bin/mpif77
LINKFLAGS =
#
ARCHIVER = ar
ARFLAGS = r
RANLIB = echo
2. GotoBLAS2
LAdir =
LAinc =
LAlib = /home/shir/mv/GotoBLAS2/libgoto2.a
F2CDEFS = -DAdd_
HPL_OPTS = -DHPL_COPY_L -DHPL_CALL_CBLAS -DHPL_DETAILED_TIMING
CC = /home/shir/mv/mv2/bin/mpicc
CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
LINKER = /home/shir/mv/mv2/bin/mpif77
3. MKL
LAdir = /opt/intel/mkl
LAinc = -I$(LAdir)/include
LAlib = -L/opt/intel/mkl/lib/intel64 /opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_sequential.a/opt/intel/mkl/lib/intel64/libmkl_core.a -Wl,--end-group
-L/opt/intel/mkl/lib/intel64 -lpthread -lm
F2CDEFS = (keep blank)
CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
LINKER = $(MPdir)/bin/mpicxx
4. CUDA
TOPdir = /home/shir/mv/hpl-2.0_FERMI_v13
MPdir = /home/shir/cmvapich2 (enable-shared version mvapich2)
MPinc = -I$(MPdir)/include
MPlib = $(MPdir)/lib/libmpich.so
LAdir = /opt/intel/mkl/lib/intel64
LAlib = -L $(TOPdir)/src/cuda -ldgemm -g -L/usr/local/cuda/lib64 -L/usr/lib64 -lcuda -lcudart -lcublas -L$(LAdir) - lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -L/opt/intel/Compiler/11.1/069/lib/intel64
-liomp5 -lpthread
F2CDEFS = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) -I/usr/local/cuda/include
HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib)
HPL_OPTS = -DCUDA
CC = $(MPdir)/bin/mpicc
CCFLAGS = $(HPL_DEFS) -O3 -w -fomit-frame-pointer -funroll-loops-fopenmp# don't use
-openmp
CCNOOPT = $(HPL_DEFS) -O0 -w
LINKER = $(MPdir)/bin/mpicxx
run_linpack注意
export LD_LIBRARY_PATH=$HPL_DIR/src/cuda:$LD_LIBRARY_PATH #保持原样
export MKL_NUM_THREADS=8#改成数字8
module load intel/latest
LAdir = /opt/intel/composer_xe_2011_sp1.9.293/mkl/lib/intel64
LAlib = -L $(TOPdir)/src/cuda -ldgemm -g -L/usr/local/cuda/lib64 -L/usr/lib64 -lcuda -lcudart -lcublas -L$(LAdir) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -L/opt/intel/composer_xe_2011_sp1.9.293/compiler/lib/intel64 -liomp5 -lpthread
Running Command
module load cuda, intel
~/cmvapich2/bin/mpirun_rsh -np 2 -hostfile hostsMV2_ENABLE_AFFINITY=0 ./run_linpack
Check status
nvidia-smi #information for NVIDIA GPU
cat /proc/cpuinfo | less # or meminfo
ssh gpu01 --> top
--> 1
# CPU usage
continued