首先要说明的是本测试系列,除了比较编译效率外,其余所有运行效率的比较都是将matlab的首次运行排出在外,因matlab程序首次运行效率较低。理论上,Forcal程序任意次运行效率都是一样的。不过话又说回来,任意程序包含的函数,有些是需要多次运行的,而有些仅运行一次,甚至一次都不运行,故matlab函数首次运行效率较低应该是一个缺点。但如果说,matlab函数首次运行会对函数进行优化,以后运行效率会显著提高,则matlab函数首次运行效率较低就成了一个优点。
=============
本次演练要用matlab和Forcal实现两个实用函数并进行测试。当然,对脚本来说,这些函数用C/C++或Fortran来实现应是脚本的最佳选择。
=============
1、求解实系数方程组的全选主元高斯消去法:包含大量数组元素存取操作
C/C++代码:
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include "time.h"
#include "math.h"
int agaus(double *a,double *b,int n)
{
int *js,l,k,i,j,is,p,q;
double d,t;
js=new int[n];
l=1;
for (k=0;k<=n-2;k++)
{
d=0.0;
for (i=k;i<=n-1;i++)
{
for (j=k;j<=n-1;j++)
{
t=fabs(a[i*n+j]);
if (t>d) { d=t; js[k]=j; is=i;}
}
}
if (d+1.0==1.0)
{
l=0;
}
else
{
if (js[k]!=k)
{
for (i=0;i<=n-1;i++)
{
p=i*n+k; q=i*n+js[k];
t=a[p]; a[p]=a[q]; a[q]=t;
}
}
if (is!=k)
{
for (j=k;j<=n-1;j++)
{
p=k*n+j; q=is*n+j;
t=a[p]; a[p]=a[q]; a[q]=t;
}
t=b[k]; b[k]=b[is]; b[is]=t;
}
}
if (l==0)
{
delete[] js; printf("fail\n");
return(0);
}
d=a[k*n+k];
for (j=k+1;j<=n-1;j++)
{
p=k*n+j; a[p]=a[p]/d;
}
b[k]=b[k]/d;
for (i=k+1;i<=n-1;i++)
{
for (j=k+1;j<=n-1;j++)
{
p=i*n+j;
a[p]=a[p]-a[i*n+k]*a[k*n+j];
}
b[i]=b[i]-a[i*n+k]*b[k];
}
}
d=a[(n-1)*n+n-1];
if (fabs(d)+1.0==1.0)
{
delete[] js; printf("fail\n");
return(0);
}
b[n-1]=b[n-1]/d;
for (i=n-2;i>=0;i--)
{
t=0.0;
for (j=i+1;j<=n-1;j++)
{
t=t+a[i*n+j]*b[j];
}
b[i]=b[i]-t;
}
js[n-1]=n-1;
for (k=n-1;k>=0;k--)
{
if (js[k]!=k)
{
t=b[k]; b[k]=b[js[k]]; b[js[k]]=t;
}
}
delete[] js;
return(1);
}
int main(int argc, char *argv[])
{
int i,j,k;
double a[4][4]=
{ {0.2368,0.2471,0.2568,1.2671},
{0.1968,0.2071,1.2168,0.2271},
{0.1581,1.1675,0.1768,0.1871},
{1.1161,0.1254,0.1397,0.1490} };
double b[4]={1.8471,1.7471,1.6471,1.5471};
double aa[4][4],bb[4];
clock_t tm;
tm=clock();
for(i=0;i<10000;i++)
{
for(j=0;j<4;j++)
{
for(k=0;k<4;k++)
{
aa[j][k]=a[j][k];
}
}
for(j=0;j<4;j++)
{
bb[j]=b[j];
}
agaus((double *)aa,bb,4);
}
printf("循环 %d 次, 耗时 %d 毫秒。\n", i,(clock()-tm));
for (i=0;i<=3;i++)
{
printf("x(%d)=%e\n",i,bb[i]);
}
}
结果:
循环 10000 次, 耗时 31 毫秒。
x(0)=1.040577e+000
x(1)=9.870508e-001
x(2)=9.350403e-001
x(3)=8.812823e-001
---------
matlab 2009a代码:
%file agaus.m
function c=agaus(a,b,n)
js=linspace(0,0,n);
l=1;
for k=1:n-1
d=0.0;
for i=k:n
for j=k:n
t=abs(a(i,j));
if (t>d)
d=t; js(k)=j; is=i;
end
end
end
if d+1.0==1.0
l=0;
else
if js(k)~=k
for i=1:n
t=a(i,k); a(i,k)=a(i,js(k)); a(i,js(k))=t;
end
end
if is~=k
for j=k:n
t=a(k,j); a(k,j)=a(is,j); a(is,j)=t;
end
t=b(k); b(k)=b(is); b(is)=t;
end
end
if l==0
printf('fail\n');
c=[];
return;
end
d=a(k,k);
for j=k+1:n
a(k,j)=a(k,j)/d;
end
b(k)=b(k)/d;
for i=k+1:n
for j=k+1:n
a(i,j)=a(i,j)-a(i,k)*a(k,j);
end
b(i)=b(i)-a(i,k)*b(k);
end
end
d=a(n,n);
if abs(d)+1.0==1.0
printf('fail\n');
c=[];
return;
end
b(n)=b(n)/d;
for i=n-1:-1:1
t=0.0;
for j=i+1:n
t=t+a(i,j)*b(j);
end
b(i)=b(i)-t;
end
js(n)=n;
for k=n:-1:1
if js(k)~=k
t=b(k); b(k)=b(js(k)); b(js(k))=t;
end
end
c=b;
return;
end
a=[0.2368,0.2471,0.2568,1.2671;
0.1968,0.2071,1.2168,0.2271;
0.1581,1.1675,0.1768,0.1871;
1.1161,0.1254,0.1397,0.1490] ;
b=[ 1.8471,1.7471,1.6471,1.5471];
tic
for i=1:10000
c=agaus(a,b,4);
end
c
toc
c =
1.0406 0.9871 0.9350 0.8813
Elapsed time is 0.762713 seconds.
----------
Forcal代码:
!using["math","sys"];
agaus(a,b,n : js,l,k,i,j,is, d,t)=
{
oo{ js=array(n)},
l=1, k=0,
while{ k<n-1,
d=0.0, i=k,
while{ i<n,
j=k, while{j<n,
t=abs(a[i,j]),
if{t>d, d=t, js[k]=j, is=i},
j++
},
i++
},
which{ d+1.0==1.0, l=0,
{ if{ (js[k]!=k),
i=0, while{i<n,
t=a[i,k], a[i,k]=a[i,js[k]], a[i,js[k]]=t,
i++
}
},
if{ (is!=k),
j=k, while{j<n,
t=a[k,j], a[k,j]=a[is,j], a[is,j]=t,
j++
},
t=b[k], b[k]=b[is], b[is]=t
}
}
},
if{ (l==0),
printff("fail\r\n"),
return(0)
},
d=a[k,k],
j=k+1, while {j<n, a[k,j]=a[k,j]/d, j++},
b[k]=b[k]/d,
i=k+1, while {i<n,
j=k+1, while{j<n,
a[i,j]=a[i,j]-a[i,k]*a[k,j],
j++
},
b[i]=b[i]-a[i,k]*b[k],
i++
},
k++
},
d=a[(n-1),n-1],
if{ abs(d)+1.0==1.0,
printff("fail\r\n"),
return(0)
},
b[n-1]=b[n-1]/d,
i=n-2, while{i>=0,
t=0.0,
j=i+1, while{j<n, t=t+a[i,j]*b[j], j++},
b[i]=b[i]-t,
i--
},
js[n-1]=n-1,
k=n-1, while{k>=0,
if{(js[k]!=k), t=b[k], b[k]=b[js[k]], b[js[k]]=t},
k--
},
return(1)
};
main(:i,a,b,aa,bb,t0)=
{
oo{a=arrayinit{2,4,4 :
0.2368,0.2471,0.2568,1.2671,
0.1968,0.2071,1.2168,0.2271,
0.1581,1.1675,0.1768,0.1871,
1.1161,0.1254,0.1397,0.1490},
b=arrayinit{1,4 : 1.8471,1.7471,1.6471,1.5471},
aa=array[4,4], bb=array[4]
},
t0=clock(),
i=0, while{i<10000, aa.=a, bb.=b, agaus(aa,bb,4), i++},
outm[bb],
[clock()-t0]/1000
};
结果:
1.04058 0.987051 0.93504 0.881282
2.125
Forcal用函数sys::A()对数组元素进行存取:
!using["math","sys"];
agaus(a,b,n : js,l,k,i,j,is, d,t)=
{
oo{ js=array(n)},
l=1, k=0,
while{ k<n-1,
d=0.0, i=k,
while{ i<n,
j=k, while{j<n,
t=abs(A[a,i,j]),
if{t>d, d=t, A[js,k]=j, is=i},
j++
},
i++
},
which{ d+1.0==1.0, l=0,
{ if{ (A[js,k]!=k),
i=0, while{i<n,
t=A[a,i,k], A[a,i,k]=A[a,i,A[js,k]], A[a,i,A[js,k]]=t,
i++
}
},
if{ (is!=k),
j=k, while{j<n,
t=A[a,k,j], A[a,k,j]=A[a,is,j], A[a,is,j]=t,
j++
},
t=A[b,k], A[b,k]=A[b,is], A[b,is]=t
}
}
},
if{ (l==0),
printff("fail\r\n"),
return(0)
},
d=A[a,k,k],
j=k+1, while {j<n, A[a,k,j]=A[a,k,j]/d, j++},
A[b,k]=A[b,k]/d,
i=k+1, while {i<n,
j=k+1, while{j<n,
A[a,i,j]=A[a,i,j]-A[a,i,k]*A[a,k,j],
j++
},
A[b,i]=A[b,i]-A[a,i,k]*A[b,k],
i++
},
k++
},
d=A[a,(n-1),n-1],
if{ abs(d)+1.0==1.0,
printff("fail\r\n"),
return(0)
},
A[b,n-1]=A[b,n-1]/d,
i=n-2, while{i>=0,
t=0.0,
j=i+1, while{j<n, t=t+A[a,i,j]*A[b,j], j++},
A[b,i]=A[b,i]-t,
i--
},
A[js,n-1]=n-1,
k=n-1, while{k>=0,
if{(A[js,k]!=k), t=A[b,k], A[b,k]=A[b,A[js,k]], A[b,A[js,k]]=t},
k--
},
return(1)
};
main(:i,a,b,aa,bb,t0)=
{
oo{a=arrayinit{2,4,4 :
0.2368,0.2471,0.2568,1.2671,
0.1968,0.2071,1.2168,0.2271,
0.1581,1.1675,0.1768,0.1871,
1.1161,0.1254,0.1397,0.1490},
b=arrayinit{1,4 : 1.8471,1.7471,1.6471,1.5471},
aa=array[4,4], bb=array[4]
},
t0=clock(),
i=0, while{i<10000, aa.=a, bb.=b, agaus(aa,bb,4), i++},
outm[bb],
[clock()-t0]/1000
};
结果:
1.04058 0.987051 0.93504 0.881282
1.454
----------
可以看出C/C++、matlab、Forcal耗时之比为 1 :25:68 (Forcal不使用函数sys::A())。
可以看出C/C++、matlab、Forcal耗时之比为 1 :25:47 (Forcal使用函数sys::A())。
本例Forcal耗时较长的原因在于本例程序含有大量的数组元素存取操作。
2、变步长辛卜生二重求积法:没有数组元素操作
C/C++代码:
#include "stdafx.h"
#include <stdio.h>
#include <stdlib.h>
#include "time.h"
#include "math.h"
double simp1(double x,double eps);
void fsim2s(double x,double y[]);
double fsim2f(double x,double y);
double fsim2(double a,double b,double eps)
{
int n,j;
double h,d,s1,s2,t1,x,t2,g,s,s0,ep;
n=1; h=0.5*(b-a);
d=fabs((b-a)*1.0e-06);
s1=simp1(a,eps); s2=simp1(b,eps);
t1=h*(s1+s2);
s0=1.0e+35; ep=1.0+eps;
while (((ep>=eps)&&(fabs(h)>d))||(n<16))
{
x=a-h; t2=0.5*t1;
for (j=1;j<=n;j++)
{
x=x+2.0*h;
g=simp1(x,eps);
t2=t2+h*g;
}
s=(4.0*t2-t1)/3.0;
ep=fabs(s-s0)/(1.0+fabs(s));
n=n+n; s0=s; t1=t2; h=h*0.5;
}
return(s);
}
double simp1(double x,double eps)
{
int n,i;
double y[2],h,d,t1,yy,t2,g,ep,g0;
n=1;
fsim2s(x,y);
h=0.5*(y[1]-y[0]);
d=fabs(h*2.0e-06);
t1=h*(fsim2f(x,y[0])+fsim2f(x,y[1]));
ep=1.0+eps; g0=1.0e+35;
while (((ep>=eps)&&(fabs(h)>d))||(n<16))
{
yy=y[0]-h;
t2=0.5*t1;
for (i=1;i<=n;i++)
{
yy=yy+2.0*h;
t2=t2+h*fsim2f(x,yy);
}
g=(4.0*t2-t1)/3.0;
ep=fabs(g-g0)/(1.0+fabs(g));
n=n+n; g0=g; t1=t2; h=0.5*h;
}
return(g);
}
void fsim2s(double x,double y[])
{
y[0]=-sqrt(1.0-x*x);
y[1]=-y[0];
}
double fsim2f(double x,double y)
{
return exp(x*x+y*y);
}
int main(int argc, char *argv[])
{
int i;
double a,b,eps,s;
clock_t tm;
a=0.0; b=1.0; eps=0.0001;
tm=clock();
for(i=0;i<100;i++)
{
s=fsim2(a,b,eps);
}
printf("s=%e , 耗时 %d 毫秒。\n", s, (clock()-tm));
}
结果:
s=2.698925e+000 , 耗时 78 毫秒。
-------
matlab代码:
%file fsim2.m
function s=fsim2(a,b,eps)
n=1; h=0.5*(b-a);
d=abs((b-a)*1.0e-06);
s1=simp1(a,eps); s2=simp1(b,eps);
t1=h*(s1+s2);
s0=1.0e+35; ep=1.0+eps;
while ((ep>=eps)&&(abs(h)>d))||(n<16),
x=a-h; t2=0.5*t1;
for j=1:n
x=x+2.0*h;
g=simp1(x,eps);
t2=t2+h*g;
end
s=(4.0*t2-t1)/3.0;
ep=abs(s-s0)/(1.0+abs(s));
n=n+n; s0=s; t1=t2; h=h*0.5;
end
end
function g=simp1(x,eps)
n=1;
[y0,y1]=f2s(x);
h=0.5*(y1-y0);
d=abs(h*2.0e-06);
t1=h*(f2f(x,y0)+f2f(x,y1));
ep=1.0+eps; g0=1.0e+35;
while (((ep>=eps)&&(abs(h)>d))||(n<16))
yy=y0-h;
t2=0.5*t1;
for i=1:n
yy=yy+2.0*h;
t2=t2+h*f2f(x,yy);
end
g=(4.0*t2-t1)/3.0;
ep=abs(g-g0)/(1.0+abs(g));
n=n+n; g0=g; t1=t2; h=0.5*h;
end
end
%file f2s.m
function [y0,y1]=f2s(x)
y0=-sqrt(1.0-x*x);
y1=-y0;
end
%file f2f.m
function c=f2f(x,y)
c=exp(x*x+y*y);
end
%%%%%%%%%%%%%
>> tic
for i=1:100
a=fsim2(0,1,0.0001);
end
a
toc
a =
2.6989
Elapsed time is 0.995575 seconds.
-------
Forcal代码:
fsim2s(x,y0,y1)=
{
y0=-sqrt(1.0-x*x),
y1=-y0
};
fsim2f(x,y)=exp(x*x+y*y);
//////////////////
simp1(x,eps : n,i,y0,y1,h,d,t1,yy,t2,g,ep,g0)=
{
n=1,
fsim2s(x,&y0,&y1),
h=0.5*(y1-y0),
d=abs(h*2.0e-06),
t1=h*(fsim2f(x,y0)+fsim2f(x,y1)),
ep=1.0+eps, g0=1.0e+35,
while {((ep>=eps)&(abs(h)>d))|(n<16),
yy=y0-h,
t2=0.5*t1,
i=1, while{i<=n,
yy=yy+2.0*h,
t2=t2+h*fsim2f(x,yy),
i++
},
g=(4.0*t2-t1)/3.0,
ep=abs(g-g0)/(1.0+abs(g)),
n=n+n, g0=g, t1=t2, h=0.5*h
},
g
};
fsim2(a,b,eps : n,j,h,d,s1,s2,t1,x,t2,g,s,s0,ep)=
{
n=1, h=0.5*(b-a),
d=abs((b-a)*1.0e-06),
s1=simp1(a,eps), s2=simp1(b,eps),
t1=h*(s1+s2),
s0=1.0e+35, ep=1.0+eps,
while {((ep>=eps)&(abs(h)>d))|(n<16),
x=a-h, t2=0.5*t1,
j=1, while{j<=n,
x=x+2.0*h,
g=simp1(x,eps),
t2=t2+h*g,
j++
},
s=(4.0*t2-t1)/3.0,
ep=abs(s-s0)/(1.0+abs(s)),
n=n+n, s0=s, t1=t2, h=h*0.5
},
s
};
//////////////////
mvar:
t0=sys::clock(),
i=0, while{i<100, a=fsim2(0,1,0.0001), i++}, a;
[sys::clock()-t0]/1000;
结果:
2.698925000624303
0.328
---------
本例C/C++、matlab、Forcal运行耗时之比为 1:12.7:4.2 。
本例matlab慢的原因应该在于有大量的函数调用。另外,matlab要求每一个函数必须存为磁盘文件真的很麻烦。
本例还说明,对C/C++和Forcal脚本来说,C/C++的一条指令,Forcal平均大约用4.2条指令进行解释。
3、变步长辛卜生二重求积法,写成通用的函数:没有数组元素操作
注意函数fsim2中增加了两个参数,函数句柄fsim2s用于计算二重积分时内层的上下限,函数句柄fsim2f用于计算积分函数的值。
不再给出C/C++代码,因其效率不会发生变化。
Matlab代码:
%file fsim2.m
function s=fsim2(a,b,eps,fsim2s,fsim2f)
n=1; h=0.5*(b-a);
d=abs((b-a)*1.0e-06);
s1=simp1(a,eps,fsim2s,fsim2f); s2=simp1(b,eps,fsim2s,fsim2f);
t1=h*(s1+s2);
s0=1.0e+35; ep=1.0+eps;
while ((ep>=eps)&&(abs(h)>d))||(n<16),
x=a-h; t2=0.5*t1;
for j=1:n
x=x+2.0*h;
g=simp1(x,eps,fsim2s,fsim2f);
t2=t2+h*g;
end
s=(4.0*t2-t1)/3.0;
ep=abs(s-s0)/(1.0+abs(s));
n=n+n; s0=s; t1=t2; h=h*0.5;
end
end
function g=simp1(x,eps,fsim2s,fsim2f)
n=1;
[y0,y1]=fsim2s(x);
h=0.5*(y1-y0);
d=abs(h*2.0e-06);
t1=h*(fsim2f(x,y0)+fsim2f(x,y1));
ep=1.0+eps; g0=1.0e+35;
while (((ep>=eps)&&(abs(h)>d))||(n<16))
yy=y0-h;
t2=0.5*t1;
for i=1:n
yy=yy+2.0*h;
t2=t2+h*fsim2f(x,yy);
end
g=(4.0*t2-t1)/3.0;
ep=abs(g-g0)/(1.0+abs(g));
n=n+n; g0=g; t1=t2; h=0.5*h;
end
end
%file f2s.m
function [y0,y1]=f2s(x)
y0=-sqrt(1.0-x*x);
y1=-y0;
end
%file f2f.m
function c=f2f(x,y)
c=exp(x*x+y*y);
end
%%%%%%%%%%%%%%%%
>> tic
for i=1:100
a=fsim2(0,1,0.0001,@f2s,@f2f);
end
a
toc
a =
2.6989
Elapsed time is 1.267014 seconds.
--------
Forcal代码:
simp1(x,eps,fsim2s,fsim2f : n,i,y0,y1,h,d,t1,yy,t2,g,ep,g0)=
{
n=1,
fsim2s(x,&y0,&y1),
h=0.5*(y1-y0),
d=abs(h*2.0e-06),
t1=h*(fsim2f(x,y0)+fsim2f(x,y1)),
ep=1.0+eps, g0=1.0e+35,
while {((ep>=eps)&(abs(h)>d))|(n<16),
yy=y0-h,
t2=0.5*t1,
i=1, while{i<=n,
yy=yy+2.0*h,
t2=t2+h*fsim2f(x,yy),
i++
},
g=(4.0*t2-t1)/3.0,
ep=abs(g-g0)/(1.0+abs(g)),
n=n+n, g0=g, t1=t2, h=0.5*h
},
g
};
fsim2(a,b,eps,fsim2s,fsim2f : n,j,h,d,s1,s2,t1,x,t2,g,s,s0,ep)=
{
n=1, h=0.5*(b-a),
d=abs((b-a)*1.0e-06),
s1=simp1(a,eps,fsim2s,fsim2f), s2=simp1(b,eps,fsim2s,fsim2f),
t1=h*(s1+s2),
s0=1.0e+35, ep=1.0+eps,
while {((ep>=eps)&(abs(h)>d))|(n<16),
x=a-h, t2=0.5*t1,
j=1, while{j<=n,
x=x+2.0*h,
g=simp1(x,eps,fsim2s,fsim2f),
t2=t2+h*g,
j++
},
s=(4.0*t2-t1)/3.0,
ep=abs(s-s0)/(1.0+abs(s)),
n=n+n, s0=s, t1=t2, h=h*0.5
},
s
};
//////////////////
f2s(x,y0,y1)=
{
y0=-sqrt(1.0-x*x),
y1=-y0
};
f2f(x,y)=exp(x*x+y*y);
mvar:
t0=sys::clock(),
i=0, while{i<100, a=fsim2(0,1,0.0001,HFor("f2s"),HFor("f2f")), i++}, a;
[sys::clock()-t0]/1000;
结果:
2.698925000624303
0.844
--------
本例matlab与Forcal耗时之比为1.267014 :0.844。Forcal仍有优势。
本例Forcal耗时增加的原因:在函数fsim2及simp1中要动态查找函数句柄fsim2s,fsim2f,并验证其是否有效,故效率下降了。