package mxr.spark.cases;
import mxr.spark.util.SparkUtil;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.junit.Test;
import scala.Tuple3;
import java.io.Serializable;
import java.util.List;
/**
* @author RenYuXin
* @date 2025/4/25 8:12
* @description
*/
public class ScoreTest implements Serializable
{
public static JavaSparkContext sc = SparkUtil.getSparkContext();
/** 取大数据基础课程的前5名 */
@Test
public void testBigDataTop5()
{
//todo 1.读取原始数据
JavaRDD<String> rdd = sc.textFile("datas/input/scoreSpark/result_bigdata.txt");
//todo 2.对原始数据进行切割:数组
Function<String,String[]> function = new Function<String,String[]>()
{
@Override
public String[] call(String line) throws Exception
{
String[] strings = line.split("\t");
return strings;
}
};
JavaRDD<String[]> mapped = rdd.map(function);
//todo 3.将数组转换为Tuple3
Function<String[],Tuple3<String,String,Integer>> tuple3Function = new Function<String[],Tuple3<String,String,Integer>>()
{
@Override
public Tuple3<String,String,Integer> call(String[] strings) throws Exception
{
return new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2]));
}
};
JavaRDD<Tuple3<String,String,Integer>> tuple3 = mapped.map(tuple3Function);
//todo 4.对Tuple3的第三个元素降序排序
Function<Tuple3<String,String,Integer>,Integer> t3f = new Function<Tuple3<String,String,Integer>,Integer>()
{
@Override
public Integer call(Tuple3<String,String,Integer> tuple3) throws Exception
{
return tuple3._3();
}
};
JavaRDD<Tuple3<String,String,Integer>> sortedBy = tuple3.sortBy(t3f,false,1);
//todo 5.取前5个元素
List<Tuple3<String,String,Integer>> taken = sortedBy.take(5);
taken.forEach(System.out :: println);
}
/** 取大数据基础课程的前5名 */
@Test
public void testBigDataTop51()
{
//todo 1.读取原始数据
JavaRDD<String> rdd = sc.textFile("datas/input/scoreSpark/result_bigdata.txt");
//todo 2.将原始数据转换为Tuple3
Function<String,Tuple3<String,String,Integer>> function = new Function<String,Tuple3<String,String,Integer>>()
{
@Override
public Tuple3<String,String,Integer> call(String line) throws Exception
{
String[] strings = line.split("\t");
if(strings.length == 3)
{
return new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2]));
}
return null;
}
};
JavaRDD<Tuple3<String,String,Integer>> tuple3 = rdd.map(function);
//todo 3.对Tuple3的第三个元素降序排序
Function<Tuple3<String,String,Integer>,Integer> t3f = new Function<Tuple3<String,String,Integer>,Integer>()
{
@Override
public Integer call(Tuple3<String,String,Integer> tuple3) throws Exception
{
return tuple3._3();
}
};
JavaRDD<Tuple3<String,String,Integer>> sortBy = tuple3.sortBy(t3f,false,1);
//todo 4.获取前5名
List<Tuple3<String,String,Integer>> taken = sortBy.take(5);
taken.forEach(System.out :: println);
}
@Test
public void testBigDataTop511()
{
//todo 1.读取原始数据
JavaRDD<String> rdd = sc.textFile("datas/input/scoreSpark/result_bigdata.txt");
//todo 2.将原始数据转换为Tuple3
Function<String,Tuple3<String,String,Integer>> function = line ->
{
String[] strings = line.split("\t");
if(strings.length == 3)
{
return new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2]));
}
return null;
};
JavaRDD<Tuple3<String,String,Integer>> tuple3 = rdd.map(function);
//todo 3.对Tuple3的第三个元素降序排序
JavaRDD<Tuple3<String,String,Integer>> sortedBy = tuple3.sortBy(Tuple3 :: _3,false,1);
//todo 4.取前5个元素
List<Tuple3<String,String,Integer>> taken = sortedBy.take(5);
taken.forEach(System.out :: println);
}
/** 取数学基础课程的前5名 */
@Test
public void testMathTop5()
{
//todo 1.读取原始数据
JavaRDD<String> rdd = sc.textFile("datas/input/scoreSpark/result_math.txt");
//todo 2.将原始数据转换为Tuple3
Function<String,Tuple3<String,String,Integer>> function = line ->
{
String[] strings = line.split("\t");
if(strings.length == 3)
{
return new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2]));
}
return null;
};
JavaRDD<Tuple3<String,String,Integer>> tuple3 = rdd.map(function);
//todo 3.对Tuple3的第三个元素降序排序
JavaRDD<Tuple3<String,String,Integer>> sortedBy = tuple3.sortBy(Tuple3 :: _3,false,1);
//todo 4.取前5名
List<Tuple3<String,String,Integer>> taken = sortedBy.take(5);
taken.forEach(System.out :: println);
}
@Test
public void testMathTop5_1()
{
//todo 1.读取原始数据
JavaRDD<String> rdd = sc.textFile("datas/input/scoreSpark/result_math.txt");
//todo 2.将原始数据转换为Tuple3
Function<String,Tuple3<String,String,Integer>> function = line ->
{
String[] strings = line.split("\t");
if(strings.length == 3)
{
return new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2]));
}
return null;
};
JavaRDD<Tuple3<String,String,Integer>> tuple3 = rdd.map(function);
//todo 3.对Tuple3的第三个元素降序排序
JavaRDD<Tuple3<String,String,Integer>> sortedBy = tuple3.sortBy(Tuple3 :: _3,false,1);
//todo 4.取前5名
List<Tuple3<String,String,Integer>> taken = sortedBy.take(5);
taken.forEach(System.out :: println);
}
/** 两科成绩都是100分的学生id */
@Test
public void testDouble100Id()
{
JavaRDD<String> bigdata = sc.textFile("datas/input/scoreSpark/result_bigdata.txt");
JavaRDD<String> math = sc.textFile("datas/input/scoreSpark/result_math.txt");
//@formatter:off
JavaRDD<Object> bigdata100Id = bigdata.map(line -> line.split("\t"))
.map(strings -> new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2])))
.filter(tuple3 -> tuple3._3()==100)
.map(Tuple3::_1);
JavaRDD<String> math100Id = math.map(line -> line.split("\t"))
.map(strings -> new Tuple3<>(strings[0],strings[1],Integer.parseInt(strings[2])))
.filter(tuple3 -> tuple3._3()==100)
.map(Tuple3::_1);
//@formatter:on
//求交集
bigdata100Id.intersection(bigdata100Id).collect().forEach(System.out :: println);
}
}
将上述代码加入求两科成绩都是100分的学生学号,使用HDFS HA
最新发布