1. build a method to simulate a algorithm run (Do not use the thread technology to simulate)
import java.util.Random;
public class TimerSimulate {
Random r=new Random();
public String test(String a) {
String ret="";
for (int i = 0; i < 1000; i++) {
for (int j = 0; j < 10000; j++) {
for (int x = 0; x < 10; x++) {
long k = i * j * x;
k = 888;
ret=a+" "+k;
}
}
}
return ret;
}
public static void main(String[] args) {
TimerSimulate t = new TimerSimulate();
long begin = System.currentTimeMillis();
for(int i=0;i<10;i++)
{
String str=t.test("a");
System.out.println(str);
}
long end = System.currentTimeMillis();
System.out.println("Total Time in second:" + (end - begin) / 1000);
}
}
2. build the scala programe, please pay attention to the cluster number and the core, and the split
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import java.lang.Thread
import java.util.Random
object NTest {
val t:TimerSimulate =new TimerSimulate()
def nf(str:String):(String)=
{
val ret=t.test(str)
(str+"--"+ret)
}
def main(args: Array[String]) {
val r:Random=new Random();
val sc = new SparkContext("spark://ip:7077", "ntest", System.getenv("SPARK_HOME"), SparkContext.jarOfClass(this.getClass))
val arr = new Array[String](args(0).toInt)
for (i <- 0 to arr.length - 1) {
val a=r.nextInt(1000)
arr(i)=a+""
}
val dataset1:RDD[(String)]= sc.parallelize(arr,args(1).toInt).map(nf)
dataset1.coalesce(1, true).saveAsTextFile("hdfs://ip:9000/test/ntest");
}
}