最近在仿写spark官方的例子,希望能获得提高,不积跬步无以至千里
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1.BroadcastTest:测试Broadcast,Broadcast是共享变量的广播变量,只读属性
package llf
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by sendoh on 2015/4/20.
*/
object BroadcastTest {
def main(args: Array[String]): Unit ={
val bcName = if (args.length > 2) args(2) else "Http" //定义分块名称
val blockSize = if (args.length > 3) args(3) else "4096" //定义分块大小
val conf = new SparkConf().setAppName("BroadcastTest").setMaster("local[2]").setSparkHome("/usr/local/spark-1.2.0-bin-hadoop2.4")
.set("spark.broadcast.factory", s"org.apache.spark.broadcast.${bcName}BroadcastFactory")
.set("spark.broadcast.blockSize", blockSize)
val sc = new SparkContext(conf)
val slices = if (args.length > 0) args(0).toInt else 2 //分割大小
val num = if (args.length > 1) args(1).toInt else 1000000 //分割数量
val arr1 = (0 until num).toArray //定义arr1为0到最大分割数的数组
for (i <- 0 until 3){ //012
println("Iteration" + i)
println("============")
val startTime = System.nanoTime //定义启动时间
val barr1 = sc.broadcast(arr1) //对arr1进行广播
val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.size) //观测量大小,10行平行
observedSizes.collect().foreach(i => println(i))
println("Iteration %d took %.0f milliseconds".format(i, (System.nanoTime - startTime) / 1E6))
}
sc.stop()
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
2.DriverSubmissionTest
package llf
import org.apache.spark.util.Utils
import scala.collection.JavaConversions._
/**
* Created by sendoh on 2015/4/20.
*/
object DriverSubmissionTest {
def main(args: Array[String]): Unit ={ //args初始化形参的值
if (args.length < 1){
println("Usage: DriverSubmissionTest <seconds-to-sleep>")
System.exit(0)
}
val numSecondsToSleep = args(0).toInt
val env = System.getenv() //从环境中取字符串,获取环境变量的值
val properties = Utils.getSystemProperties() //工具获取系统特性
println("Environment variables containing SPARK_TEST:")
env.filter{case (k, v) => k.contains("SPARK_TEST")}.foreach(println)
println("System properties containing spark.test:")
properties.filter{case (k, v) => k.toString.contains("spark.test")}.foreach(println)
for (i <- 1 until numSecondsToSleep) {
println(s"Alive for $i out of $numSecondsToSleep seconds")
Thread.sleep(1000)
}
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////3.ExceptionHandingTest:抛出异常测试
package llf
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by sendoh on 2015/4/22.
*/
object ExceptionHandlingTest { //异常处理测试
def main(args: Array[String]): Unit ={
val conf = new SparkConf().setAppName("ExceptionHandlingTest").setMaster("local[2]")
val sc = new SparkContext(conf)
sc.parallelize(0 until sc.defaultParallelism).foreach{ i => //sc的平行值为0~默认值,对于每个元素来说,如果随机产生的值(0-1.0)大于0.75 发送新的异常
if (math.random > 0.75){ //random产生随机数
throw new Exception("Testing Exception Handling") //throw用于发送异常
}
}
sc.stop()
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////4.GroupByTest:测试根据键值分组
package llf
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.SparkContext._
import scala.util.Random
/**
* Created by sendoh on 2015/4/22.
*/
object GroupByTest {
def main(args: Array[String]): Unit ={
val conf = new SparkConf().setAppName("GroupByTest").setMaster("local[2]")
val sc = new SparkContext(conf)
val numMappers = if (args.length > 0) args(0).toInt else 2 //args形参数组要通过命令行运行的时候赋值,将形参的第一个值赋值给numMappers
val numKVPairs = if (args.length > 1) args(1).toInt else 1000 //将形参的第二个值负值给numKVPairs
val valSize = if (args.length > 2) args(2).toInt else 1000 //将形参的第三个值赋值给valSize
val numReducers = if (args.length > 3) args(3).toInt else numMappers //将形参的第四个值赋值给numReducers
val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap{ p =>
val ranGen = new Random //定义ranGen为一个随机数
val arr1 = new Array[(Int, Array[Byte])](numKVPairs)
for (i <- 0 until numKVPairs){
val byteArr = new Array[Byte](valSize)
ranGen.nextBytes(byteArr)
arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr)
}
arr1
}.cache()
pairs1.count()
println(pairs1.groupByKey(numReducers).count())
sc.stop()
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
初学spark还有很多很多要学的东西,欢迎共同讨论相关问题QQ:1217111493