:scala 版算法
package com.bbw5.dataalgorithms.spark
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.HashMap
/**
* The FindCommonFriends is a Spark program to find "common friends"
* between all users.
* @author baibaiw5
*/
object SparkFindCommonFriends {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("SparkFindAssociationRules")
val sc = new SparkContext(sparkConf)
val data = sc.parallelize(Seq("100,200 300 400 500 600", "200,100 300 400", "300,100 200 400 500",
"400,100 200 300", "500,100 300", "600,100"), 2)
val friendsRdd = data.flatMap { l =>
val a = l.split(",")
val id = a(0).toInt
val friends = a(1).split(" ").filter { x => x.trim().length() > 0 }.map(_.toInt).toList
friends.map { f =>
if (f < id) (f -> id) -> friends else (id -> f) -> friends
}
}
friendsRdd.collect().foreach(println)
val commonFriends = friendsRdd.groupByKey().mapValues { x =>
val map = new HashMap[Int, Int]()
x.foldLeft(map) { (map, a) =>
a.foreach(i => map.put(i, map.getOrElse(i, 0) + 1))
map
}
map.filter(p => p._2 == x.size).map(_._1)
}
commonFriends.collect().foreach(println)
}
}