在《GPU编程CUDA实战》第45页有个Julia数据集,本文把这个CUDA的程序改成了java的,然后每个程序运行20次,统计时间,比较这两个程序那个快些。
java(ms) | c++(ms) |
202 | 1904 |
269 | 1850 |
228 | 1737 |
242 | 1712 |
181 | 1770 |
222 | 1774 |
272 | 1733 |
295 | 1681 |
238 | 1320 |
282 | 1725 |
248 | 1833 |
294 | 1478 |
264 | 1692 |
176 | 1431 |
229 | 1479 |
230 | 1457 |
256 | 1645 |
193 | 1802 |
239 | 1208 |
230 | 1537 |
239.5 | 1638.4 |
结果是惊人的,java完成同样的计算任务用的时间只有CUDA的15%,就这道题来说Java的速度是CUDA的5.8倍.
CUDA程序
#include "book.h"
#include "cuda_runtime.h"
#include "cpu_bitmap.h"
#include<iostream>
#include <time.h>
using namespace std;
#define DIM 1000
/****************************************/
struct cuComplex {
float r;
float i;
__device__ cuComplex(float a,float b) : r(a),i(b) {}
__device__ float magnitude2(void){
return r*r+i*i;
}
__device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r-i*a.i, i*a.r+r*a.i);
}
__device__ cuComplex operator+(const cuComplex& a){
return cuComplex(r+a.r,i+a.i);
}
};
/***************************************/
__device__ int julia(int x ,int y){
const float scale =2;
float jx=scale*(float)(DIM/2-x)/(DIM/2);
float jy=scale*(float)(DIM/2-y)/(DIM/2);
cuComplex c(-0.8,0.156);
cuComplex a(jx,jy);
int i=0;
for(i=0 ;i<200; i++){
a=a*a+c;
if(a.magnitude2()>1000)
return 0;
}
return 1;
}
__global__ void kernel(unsigned char *ptr){
int x=blockIdx.x;
int y=blockIdx.y;
int offset=x+y*gridDim.x;
int juliaValue=julia(x,y);
ptr[offset*4+0]=255*juliaValue;
ptr[offset*4+1]=0;
ptr[offset*4+2]=0;
ptr[offset*4+3]=255;
}
int main(void){
clock_t start,ends;
start=clock();
CPUBitmap bitmap(DIM,DIM);
unsigned char *dev_bitmap;
HANDLE_ERROR (cudaMalloc( (void**)&dev_bitmap,bitmap.image_size()));
dim3 grid(DIM,DIM);
kernel<<< grid,1>>>(dev_bitmap);
HANDLE_ERROR (cudaMemcpy (bitmap.get_ptr(),dev_bitmap,bitmap.image_size(),cudaMemcpyDeviceToHost ));
ends=clock();
cout<<(ends-start)*1000/CLOCKS_PER_SEC <<endl;
bitmap.display_and_exit();
HANDLE_ERROR (cudaFree(dev_bitmap));
}
Java程序
import java.awt.Color;
import java.awt.Font;
import java.awt.Graphics;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import javax.swing.JFrame;
import javax.swing.JPanel;
public class julia1 extends JFrame {
MyPanedrawjulia1 mp = null ; //1
public static void main(String[] args)
{
long sysDate1 = System.currentTimeMillis();
julia1 qwe = new julia1(); //2
long sysDate2 = System.currentTimeMillis();
System.out.println(sysDate2-sysDate1 );
}
public julia1() //3
{
mp = new MyPanedrawjulia1(); //4
this.add(mp);
this.setSize(3000,2000);
this.setVisible(true);
this.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
}
class MyPanedrawjulia1 extends JPanel //用于绘图和实现绘图区域 //5
{
public void paint(Graphics g)
{
super.paint(g);
try {
julia( g );
} catch (IOException e1) {
e1.printStackTrace();
}
}
private void julia(Graphics g) throws IOException
{
g.setFont(new Font("宋体",Font.BOLD,20));
int dim=1000;
double c=-0.8;
double d=0.156;
double rate=1.6;
DecimalFormat df = new DecimalFormat( "0.0000000000");
for(double a=1 ;a<1000;a++)
{
for(double b=1 ;b<1000;b++)
{
double p1=rate*(dim/2-a)/(dim/2);
double p2=rate*(dim/2-b)/(dim/2);
double sum=0;
for( int n=0 ;n<200;n++ )
{
double r1=0.0;
double r2=0.0;
r1=Math.pow(p1, 2)-Math.pow(p2, 2)+c;
r2=2*p1*p2+d;
p1=r1;
p2=r2;
sum=sum+(Math.pow(p1, 2)+Math.pow(p2, 2));
if (sum>1000)
{
break;
}
}
if(sum<1000)
{
//System.out.println(a+" ***** "+b +" "+sum+" " );
g.setColor(Color.red);
g.drawOval((int)(a), (int)(b),2, 2);
}
/*************************************************/
}
}
}
}
CUDA生成图片
Java生成图片