package com.tipdm.sparkDemo import org.apache.spark.{SparkConf, SparkContext} object a1 { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("WordCount").setMaster("local") val sc = new SparkContext(conf) val rdd1 = sc.parallelize(List(('a',1),('b',99),('c',100),('d',101))) val rdd2 = sc.parallelize(List(('e',120),('f',150))) val rdd3 = rdd1.union(rdd2) rdd3.filter(_._2 >= 100).collect rdd3.filter(x => x._2 >= 100).collect.foreach(println) val rdd4 = sc.parallelize(List(('a',1),('b',99),('c',100),('d',101),('c',100))) rdd4.filter(_._2 >= 100).collect val rdd5 = rdd4.distinct() rdd5.filter(x => x._2 >= 10
Spark RDD方法实操
最新推荐文章于 2025-05-08 16:37:44 发布