MKL源码安装,支持多核环境配置
1. 安装包下载并解压
链接: Intel® Math Kernel Library (Intel® MKL).
新的改变
2. 安装
1). 解压至任意目录
2). 运行安装脚本, 默认安装至 /opt/, 可配置安装路径(建议默认安装)
$ ./install.sh
3). 在 /etc/ld.so.conf.d 下创建名为 intel-mkl.conf 的文件,内容为
/opt/intel/mkl/lib/intel64
/opt/intel/lib/intel64
然后执行
$ ldconfig -v
4). 执行
/opt/intel/mkl/bin/mklvars.sh intel64 mod
3. 环境变量设置
1) 进入startup文件编辑(bash shell)
$ vi ~/.bashrc
2)添加环境变量
export PATH=/opt/intel/bin:$PATH
export LD_LIBRARY_PATH=/opt/intel/lib/intel64:/opt/intel/mkl/lib/intel64:$LD_LIBRARY_P
source /opt/intel/compilers_and_libraries/linux/mkl/bin/mklvars.sh intel64
第三条语句用于 -lmkl_intel_thread库的调用实现多核并行计算
3)生效环境变量
$ source ~/.bashrc
4. 官方样例
dgemm_example.c
编译命令
gcc -I/opt/intel/mkl/include dgemm_example.c -lmkl_rt -L/opt/intel/mkl/lib/intel64 -L/opt/intel/lib/intel64
#define min(x,y) (((x) < (y)) ? (x) : (y))
#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
int main()
{
double *A, *B, *C;
int m, n, p, i, j;
double alpha, beta;
printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
" Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"
" alpha and beta are double precision scalars\n\n");
m = 2000, p = 200, n = 1000;
printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
" A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
alpha = 1.0; beta = 0.0;
printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
" performance \n\n");
A = (double *)mkl_malloc( m*p*sizeof( double ), 64 );
B = (double *)mkl_malloc( p*n*sizeof( double ), 64 );
C = (double *)mkl_malloc( m*n*sizeof( double ), 64 );
if (A == NULL || B == NULL || C == NULL) {
printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
return 1;
}
printf (" Intializing matrix data \n\n");
for (i = 0; i < (m*p); i++) {
A[i] = (double)(i+1);
}
for (i = 0; i < (p*n); i++) {
B[i] = (double)(-i-1);
}
for (i = 0; i < (m*n); i++) {
C[i] = 0.0;
}
printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
m, n, p, alpha, A, p, B, n, beta, C, n);
printf ("\n Computations completed.\n\n");
printf (" Top left corner of matrix A: \n");
for (i=0; i<min(m,6); i++) {
for (j=0; j<min(p,6); j++) {
printf ("%12.0f", A[j+i*p]);
}
printf ("\n");
}
printf ("\n Top left corner of matrix B: \n");
for (i=0; i<min(p,6); i++) {
for (j=0; j<min(n,6); j++) {
printf ("%12.0f", B[j+i*n]);
}
printf ("\n");
}
printf ("\n Top left corner of matrix C: \n");
for (i=0; i<min(m,6); i++) {
for (j=0; j<min(n,6); j++) {
printf ("%12.5G", C[j+i*n]);
}
printf ("\n");
}
printf ("\n Deallocating memory \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
printf (" Example completed. \n\n");
return 0;
}