矩阵数组在MATLAB中生成
clear;
clc;
m=16;
n=16;
A=rand(n,m);
B=repmat(1,1,m);
B=diag(B);
C=[];
for j=1:n
D=[];
for i=1:m
D=[D B*A(j,i)];
end
C=[C;D];
end
[p,q]=size(C);
fid=fopen('..\CUDA FORTRAN test\data\MatrixA.txt','w+');
for i=1:p
% for j=1:q
fprintf(fid, '%6.2f',C(i,:));
fprintf(fid, '\n');
% end
end
GPU代码使用PGI CUDA FORTRAN编程手册提供的矩阵乘法例子,没有优化:
program mmul
use mmul_mod
implicit none
integer :: ioa=10, iob=11, ioc=12
real :: A(N,M), B(M,L), C(N,L)
integer :: N=256
integer ::M=256