package com.bjsxt.scala;
import java.util.Arrays;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class WordCount {
public static void main(String[] args) {
/**
* 最前边的配置
* conf
*/
SparkConf conf=new SparkConf().setAppName("test").setMaster("local");
/**
* 上下文:
* Sparkcontext
*/
JavaSparkContext sc=new JavaSparkContext(conf);
/**rdd1:
* sc.textFile()
* 传入文件
*/
JavaRDD<String> rdd1 = sc.textFile("./words");
/**
* rdd2:
* flatmap:
* 一对多,进行切分
* 切分:
* Arrays.asList(line.split(" "))
*/
JavaRDD<String> rdd2 = rdd1.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterable<String> call(String line) throws Exception {
// TODO Auto-generated method stub
return Arrays.asList(line.split(" "));
}
});
/**
* rdd3:
* map:一对一
* maptopair
* 返回:
* new Tuple2<String,Integer>(word,1);
* 这里的maptopair是做什么的?
*/
JavaPairRDD<String, Integer> rdd4 = rdd2.mapToPair(new PairFunction<String, String,Integer>() {
@Override
public Tuple2<String, Integer> call(String word) throws Exception {
// TODO Auto-generated method stub
return new Tuple2<String,Integer>(word,1);
}
});
/**
* rdd5:
* reduceByKey:
* 先分组,根据key相同来计算value
*/
JavaPairRDD<String, Integer> rdd5 = rdd4.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer v1, Integer v2) throws Exception {
// TODO Auto-generated method stub
return v1+v2;
}
});
/**
* mapToPair
* 将key和value转换一下
*/
JavaPairRDD<Integer, String> rdd6 = rdd5.mapToPair(new PairFunction<Tuple2<String,Integer>, Integer, String>() {
@Override
public Tuple2<Integer, String> call(Tuple2<String, Integer> tuple) throws Exception {
// TODO Auto-generated method stub
return new Tuple2<Integer,String>(tuple._2,tuple._1);
}
});
/**
* rdd6
* 排序:
* sortBykey
*/
JavaPairRDD<Integer, String> result = rdd6.sortByKey(false);
/**
* 最后循环遍历打印输出
*/
result.foreach(new VoidFunction<Tuple2<Integer,String>>() {
@Override
public void call(Tuple2<Integer, String> arg0) throws Exception {
System.out.println(arg0);
}
});
/**
* 最后停止服务
* sc.stop()
*/
sc.stop();
}
}