PairRDD方法:(针对键值对的RDD方法)
groupByKey():根据Key对Value分组
scala> val rdd1=sc.makeRDD(List(("A",2),("B",1),("A",5),("C",2),("B",2)))
scala> rdd1.groupByKey().collect
res15: Array[(String, Iterable[Int])] = Array((A,CompactBuffer(2, 5)), (B,CompactBuffer(1, 2)), (C,CompactBuffer(2)))
sortByKey():以Key为依据进行排序(对Key排序)
scala> rdd1.sortByKey().collect //默认升序
res16: Array[(String, Int)] = Array((A,2), (A,5), (B,1), (B,2), (C,2))
scala> rdd1.sortByKey(false).collect //降序
res17: Array[(String, Int)] = Array((C,2), (B,1), (B,2), (A,2), (A,5))
reduceByKey():对具有相同的key的value值做某个函数运算
scala> rdd1.reduceByKey((x,y)=>x+y).collect
res18: Array[(String, Int)] = Array((A,7), (B,3), (C,2))
mapValues():对每个Value做指定函数运算
scala> rdd1.mapValues(x=>x+10).collect
res24: Array[(String, Int)] = Array((A,12), (B,11), (A,15), (C,12), (B,12))
scala> val rdd3=sc.makeRDD(List((1,"cat"),(2,"dog")))
scala> rdd3.mapValues(_+"s").collect
res25: Array[(Int, String)] = Array((1,cats), (2,dogs))
flatMapValues():对每个value值做指定函数运算,每个value可能被映射为若干个新值
scala> rdd3.flatMapValues(_+"s").collect
res26: Array[(Int, Char)] = Array((1,c), (1,a), (1,t), (1,s), (2,d), (2,o), (2,g), (2,s))
scala> val rdd4=sc.makeRDD(List(("cat",1),("dog",2)))
scala> rdd4.flatMapValues(x=>x.to(4)).collect
res37: Array[(String, Int)] = Array((cat,1), (cat,2), (cat,3), (cat,4), (dog,2), (dog,3), (dog,4))
groupByKey():根据Key对Value分组
scala> val rdd1=sc.makeRDD(List(("A",2),("B",1),("A",5),("C",2),("B",2)))
scala> rdd1.groupByKey().collect
res15: Array[(String, Iterable[Int])] = Array((A,CompactBuffer(2, 5)), (B,CompactBuffer(1, 2)), (C,CompactBuffer(2)))
sortByKey():以Key为依据进行排序(对Key排序)
scala> rdd1.sortByKey().collect //默认升序
res16: Array[(String, Int)] = Array((A,2), (A,5), (B,1), (B,2), (C,2))
scala> rdd1.sortByKey(false).collect //降序
res17: Array[(String, Int)] = Array((C,2), (B,1), (B,2), (A,2), (A,5))
reduceByKey():对具有相同的key的value值做某个函数运算
scala> rdd1.reduceByKey((x,y)=>x+y).collect
res18: Array[(String, Int)] = Array((A,7), (B,3), (C,2))
mapValues():对每个Value做指定函数运算
scala> rdd1.mapValues(x=>x+10).collect
res24: Array[(String, Int)] = Array((A,12), (B,11), (A,15), (C,12), (B,12))
scala> val rdd3=sc.makeRDD(List((1,"cat"),(2,"dog")))
scala> rdd3.mapValues(_+"s").collect
res25: Array[(Int, String)] = Array((1,cats), (2,dogs))
flatMapValues():对每个value值做指定函数运算,每个value可能被映射为若干个新值
scala> rdd3.flatMapValues(_+"s").collect
res26: Array[(Int, Char)] = Array((1,c), (1,a), (1,t), (1,s), (2,d), (2,o), (2,g), (2,s))
scala> val rdd4=sc.makeRDD(List(("cat",1),("dog",2)))
scala> rdd4.flatMapValues(x=>x.to(4)).collect
res37: Array[(String, Int)] = Array((cat,1), (cat,2), (cat,3), (cat,4), (dog,2), (dog,3), (dog,4))
combineByKey():
欢迎阅读者加qq1204738320交流