// 统计集合中出现次数最多的前三名
val list = List("hello amy" , "from hadoop" ,"hello from","hello hadoop")
val top3 = list.flatMap(_.split(" "))
.groupBy(x => x)
// map无序 ,转为list
.map(t => (t._1 ,t._2.size))
.toList.sortBy(_._2)(Ordering.Int.reverse)
.take(3)
//统计每一个单词出现的次数
val list1 = List("hello" -> 5 , "hi" -> 3 ,"hello" -> 6)
val groupBy = list1.groupBy(_._1)
// 元组无法扁平化
groupBy.map(
t => (
t._1 ,
//元素reduce前后的类型要一致
t._2.reduce(
(x,y) =>( x._1 ,x._2 + y._2)
)._2
)
)