数据源:
http://bigdata.zpark.cn/laozhang
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laoduan
http://bigdata.zpark.cn/laoduan
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laoduan
http://bigdata.zpark.cn/laoduan
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laozhao
http://bigdata.zpark.cn/laoduan
http://bigdata.zpark.cn/laoduan
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/xiaoxu
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://javaee.zpark.cn/laoyang
http://php.zpark.cn/laoli
http://php.zpark.cn/laoliu
http://php.zpark.cn/laoli
http://php.zpark.cn/laoli
scala代码:
import java.net.URL
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object FavPerson2Filter {
private val subjects = Array("bigdata","php","javaee")
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[4]")
conf.setAppName("FavPerson2Filter")
val sc = new SparkContext(conf)
val lines: RDD[String] = sc.textFile(args(0))
val subjectpersonAndOne: RDD[((String, String), Int)] = lines.map(line => {
val index: Int = line.lastIndexOf("/")
val person: String = line.substring(index + 1)
val urlString: String = line.substring(0, index)
val urlHost = new URL(urlString).getHost
val subject: String = urlHost.split("\\.")(0)
((subject, person), 1)
})
val reduced: RDD[((String, String), Int)] = subjectpersonAndOne.reduceByKey(_+_)
for(sb <- subjects) {
val filted: RDD[((String, String), Int)] = reduced.filter(_._1._1 == sb)
val tuples: Array[((String, String), Int)] = filted.sortBy(_._2, false).take(3)
print(tuples.toBuffer)
}
sc.stop()
}
}