package cn.spark.study.core;
import java.util.List;
import java.util.Arrays;
import java.util.Map;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;
public class ActionOperation {
public static void main(String[] args) {
//reduce();
//collect();
//count();
//take();
//saveAsTestFile();
countByKey();
}
private static void reduce(){
SparkConf conf =new SparkConf().setAppName("reduce").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Integer> numberList = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
JavaRDD<Integer> numbers =sc.parallelize(numberList);
int count = numbers.reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer arg0, Integer arg1) throws Exception {
return arg0+arg1;
}
});
System.out.println(count);
sc.close();
}
private static void collect(){
SparkConf conf =new SparkConf().setAppName("collect").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Integer> numberList = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
JavaRDD<Integer> numbers =sc.parallelize(numberList);
JavaRDD<Integer> multipyNumbers =numbers.map(new Function<Integer, Integer>() {
@Override
public Integer call(Integer arg0) throws Exception {
return arg0*2;
}
});
List<Integer> multipyList =multipyNumbers.collect();
for(Integer num : multipyList){
System.out.println(num);
}
sc.close();
}
private static void count(){
SparkConf conf =new SparkConf().setAppName("count").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Integer> numberList = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
JavaRDD<Integer> numbers =sc.parallelize(numberList);
long count = numbers.count();
System.out.println(count);
sc.close();
}
private static void take(){
SparkConf conf =new SparkConf().setAppName("take").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Integer> numberList = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
JavaRDD<Integer> numbers =sc.parallelize(numberList);
List<Integer> three = numbers.take(3);
System.out.println(three);
sc.close();
}
private static void saveAsTestFile(){
SparkConf conf =new SparkConf().setAppName("saveAsTestFile").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Integer> numberList = Arrays.asList(1,2,3,4,5,6,7,8,9,10);
JavaRDD<Integer> numbers =sc.parallelize(numberList);
JavaRDD<Integer> multipyNumbers =numbers.map(new Function<Integer, Integer>() {
@Override
public Integer call(Integer arg0) throws Exception {
return arg0*2;
}
});
multipyNumbers.saveAsTextFile("./count.txt");
sc.close();
}
private static void countByKey(){
SparkConf conf =new SparkConf().setAppName("reduceByKey").setMaster("local");
JavaSparkContext sc =new JavaSparkContext(conf);
List<Tuple2<String, String>> studentlist= Arrays.asList(
new Tuple2<String, String>("class1","elo"),
new Tuple2<String, String>("class2","jave"),
new Tuple2<String, String>("class1","tom"),
new Tuple2<String, String>("class2","smi"),
new Tuple2<String, String>("class2","smssi")
);
JavaPairRDD<String, String> students = sc.parallelizePairs(studentlist);
Map<String,Long> studentCounts= students.countByKey();
for(Map.Entry<String, Long> studentCount : studentCounts.entrySet()){
System.out.println(studentCount.getKey() + ":"+ studentCount.getValue());
}
sc.close();
}
}
spark 2.2.0 action操作 java版
最新推荐文章于 2021-09-22 16:17:42 发布