累加器
import org.apache.spark.Accumulator; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.broadcast.Broadcast; import scala.Tuple2; import java.util.ArrayList; import java.util.List; /** * Created by hadoop on 17-11-2. * 迭代器,其实就是用来在excutor上计算后能够叠加的值,在节点上不能读,只能写 */ public class AccumulatorDemo { public static void main(String[]args){ SparkConf conf = new SparkConf() .setAppName(" Accumulator") .setMaster("local[4]") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.defaultparallelism","4"); JavaSparkContext sc = new JavaSparkContext(conf); System.out.println("star========="); Person []persons=new Person[1000]; //广播对象 Broadcast<Person[]> person_br=sc.broadcast(persons); //累加器 Accumulator<Integer> count=sc.accumulator(0); // List<String> data1=new ArrayList<String>(); data1.add("banala"); data1.add("orage"); data1.add("chiken"); data1.add("beef"); data1.add(""); data1.add("egg"); data1.add(""); JavaRDD<String> rdd1=sc.parallelize(data1,2); rdd1.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { long id=Thread.currentThread().getId(); System.out.println("s:"+s+"in thread:"+id); if(s.equals("")){ Person p=new Person(); int x=p.getNumber(); x++; p.setNumber(x); //count.add(1); } return new Tuple2<String,Integer>(s,1); } }).collect(); //System.out.println(count.value()); Person p1=new Person(); System.out.println(p1.getNumber()); sc.stop(); } static class Person{ static int number=0; public int getNumber() { return number; } public void setNumber(int number) { this.number = number; } } }
广播
import org.apache.spark.Accumulator; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.Function2; import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.broadcast.Broadcast; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaReceiverInputDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import scala.Tuple2; import java.util.Arrays; import java.util.List; /** * Created by hadoop on 17-11-3. * 广播和计数需要一个action */ public class BroadCastDemo { //创建一个list的广播变量 private static volatile Broadcast<List<String>> broadcasLIst=null; //创建一个计数器 private static volatile Accumulator<Integer> accumulator=null; public static void main(String[]args) throws InterruptedException { SparkConf conf = new SparkConf() .setAppName("Broadcast") .setMaster("local[4]") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); System.out.println("star========="); //5s钟处理一次 JavaStreamingContext jsc=new JavaStreamingContext(conf, Durations.seconds(5)); broadcasLIst=jsc.sparkContext().broadcast(Arrays.asList("hadoop","mahout","hive")); accumulator=jsc.sparkContext().accumulator(0,"OnlineBlackListCounter"); JavaReceiverInputDStream<String>lines=jsc.socketTextStream("localhost",9999); JavaPairDStream<String,Integer> pairs=lines.mapToPair(new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) throws Exception { return new Tuple2<String, Integer>(s,1); } }); JavaPairDStream<String,Integer> wordcount=pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer integer, Integer integer2) throws Exception { return integer+integer2; } }); // wordcount.foreach(new Function2<JavaPairRDD<String,Integer>,Time,Void>(){ // @Override // public Void call(JavaPairRDD<String,Integer>rdd,Time time)throws Exception { // rdd.filter(new Function<Tuple2<String, Integer>, Boolean>() { // @Override // public Boolean call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception { // if (broadcasLIst.value().contains(stringIntegerTuple2._1)) { // accumulator.add(stringIntegerTuple2._2); // return false; // } else { // return true; // } // // } // }).collect(); // System.out.println("广播变量里的值"+broadcasLIst.value()); // System.out.println("累加器里的值"+accumulator.value()); // return null; // } // }); jsc.start(); jsc.awaitTermination(); jsc.close(); } }