BLAS库学习

最新推荐文章于 2025-06-28 09:23:42 发布

原创最新推荐文章于 2025-06-28 09:23:42 发布 · 1.9w 阅读

16 ·

CC 4.0 BY-SA版权

文章标签：

#matrix #c #vector #random #each #编译器

MinGW 专栏收录该内容

4 篇文章

订阅专栏

本文介绍BLAS不同级别的矩阵运算函数，并通过一个示例程序演示如何使用这些函数进行矩阵乘法计算。比较了直接使用C代码与调用BLAS函数在效率上的差异。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

首先，命名规范：

There are three levels of BLAS operations,

Level 1

Vector operations, e.g. y = /alpha x + y 向量操作

Level 2

Matrix-vector operations, e.g. y = /alpha A x + /beta y 矩阵与向量操作

Level 3

Matrix-matrix operations, e.g. C = /alpha A B + C 矩阵与矩阵的操作

Each routine has a name which specifies the operation, the type of matrices involved and their precisions. Some of the most common operations and their names are given below,

DOT

scalar product, x^T y

AXPY

vector sum, /alpha x + y

matrix-vector product, A x

matrix-vector solve, inv(A) x

matrix-matrix product, A B

matrix-matrix solve, inv(A) B

The type of matrices are,

general

general band

symmetric

symmetric band

symmetric packed

hermitian

hermitian band

hermitian packed

triangular

triangular band

triangular packed

Each operation is defined for four precisions,

single real

double real

single complex

double complex

Thus, for example, the name SGEMM stands for "single-precision general matrix-matrix multiply" and ZGEMM stands for "double-precision complex matrix-matrix multiply".

因此，例如，命名为SGEMM的函数意思为“单精度普通矩阵乘法”，ZGEMM为“双精度复数矩阵乘法”。

更多的，可以参考：http://www.netlib.org/blas/blasqr.pdf

下面这个例子是在csdn论坛上看到的，简单地改一下，可用GotoBlas2调用成功。

程序说明：下面的matrix.c 文件分别调用 C 代码， BLAS Level 1 函数 (ddot), BLAS Level 2 函数(dgemv) 与 BLAS Level 3的函数(DGEMM)完成矩阵计算： Yours_multiply 是 C 源代码，它直接依赖编译器生成优化代码。Ddot_Multiply，Dgemv_multiply使用Gotoblas2调用实现部分矩阵运算。Dgemm_multiply 直接调用GotoBlas2 的矩阵计算函数。

//Simple minded matrix multiply #include <stdio.h> #include <time.h> #include <stdlib.h> #include <common.h> //GotoBlas2 #include <cblas.h> //GotoBlas2 void print_arr(int N, char * name, double* array); void init_arr(int N, double* a); void Dgemm_multiply(double* a,double* b,double* c, int N); void Dgemv_multiply(double* a,double* b,double* c, int N); void Ddot_Multiply(double* a,double* b,double* c, int N); void Yours_multiply(double* a,double* b,double* c, int N); int main(int argc, char* argv[]) { clock_t start, stop; int i, j; int N; double* a; double* b; double* c; if(argc < 2) { printf("Enter matrix size N="); //please enter small number first to ensure that the //multiplication is correct! and then you may enter //a "reasonably" large number say like 500 or even 1000 scanf("%d",&N); } else { N = atoi(argv[1]); } a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Yours_multiply(a,b,c,N); stop = clock(); printf("roll_your_own_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DDOT multiply a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Ddot_Multiply(a,b,c,N); stop = clock(); printf("Ddot_Multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DGEMV Multiply //reallcoate to force cash to be flushed a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Dgemv_multiply(a,b,c,N); stop = clock(); printf("Dgemv_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DGEMM Multiply //reallocate to force cash to be flushed a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Dgemm_multiply(a,b,c,N); stop = clock(); printf("Dgemm_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); return 0; } //Brute force way of matrix multiply void Yours_multiply(double* a,double* b,double* c, int N) { int i, j, k; for (i=0;i<N*N;i++) c[i]=0; for (i = 0; i < N; i++) { for (j=0; j<N; j++) { for (k=0; k<N; k++) { c[N*i+j] += a[N*i+k] * b[N*k+j]; } } } } //The ddot way to matrix multiply void Ddot_Multiply(double* a,double* b,double* c, int N) { int i, j; int incx = 1; int incy = N; for (i = 0; i < N; i++) { for (j=0; j<N; j++) { c[N*i+j] = cblas_ddot(N,&a[N*i],incx,&b[j],incy); } } } //DGEMV way of matrix multiply void Dgemv_multiply(double* a,double* b,double* c, int N) { int i; double alpha = 1.0, beta = 0.; int incx = 1; int incy = N; for (i = 0; i < N; i++) { cblas_dgemv(CblasRowMajor,CblasNoTrans,N,N,alpha,a,N,&b[i],N,beta,&c[i],N); } } //DGEMM way. The PREFERED way, especially for large matrices void Dgemm_multiply(double* a,double* b,double* c, int N) { int i; double alpha = 1.0, beta = 0.; int incx = 1; int incy = N; cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,N,N,N,alpha,b,N,a,N,beta,c,N); } //initialize array with random data void init_arr(int N, double* a) { int i,j; for (i=0; i< N;i++) { for (j=0; j<N;j++) { a[i*N+j] = (i+j+1)%10; //keep all entries less than 10. pleasing to the eye! } } } //print array to std out void print_arr(int N, char * name, double* array) { int i,j; printf("/n%s/n",name); for (i=0;i<N;i++) { for (j=0;j<N;j++) { printf("%g/t",array[N*i+j]); } printf("/n"); } }