package xiangqi_spark.YeWu import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} import xiangqi_spark.util_scala.YearMonthDay2Timestamp import scala.collection.mutable import scala.collection.mutable.Set /** * Created by Administrator on 2018/1/3. */ object LaoTie { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("trip_test").setMaster("local[*]") conf.set("spark.testing.memory", "1847483648") val sc = new SparkContext(conf) // val line: RDD[String] = sc.textFile("E:\\work-xq\\项目\\24相邻订单小于10分钟次数\\trip.txt") val line: RDD[String] = sc.textFile("E:\\work-xq\\项目\\24相邻订单小于10分钟次数\\666.txt") val datardd: RDD[(String, String, String,String)] = line.map( x => { val data: Array[String] = x.split(",") val trip_id = data(0) val user_id = data(1) val city_id = data(2) val start_time = data(3) val time_consume = data(4) ( user_id+","+city_id, start_time, time_consume,trip_id) } ) val rdd1: RDD[(String, Iterable[(String, String, String,String)])] = datardd.groupBy(_._1) val values: RDD[(String, Int)] = rdd1.mapValues ( value => { val map: Iterable[(String, String, String)] = value.map ( x => (x._2, x._3, x._4) ) //开始时间,时间距离,orderid val seq: Seq[(String, String, String)] = map.toSeq.sortBy ( y => YearMonthDay2Timestamp.stringToTimestamp ( y._1 ) ) val res: mutable.Set[String] = mutable.Set ( ) for (i <- 1 until seq.length) { if (seq ( i - 1 )._2.toInt < 10 && seq ( i )._2.toInt < 10) { res.add ( seq ( i - 1 )._3 ) res.add ( seq ( i )._3 ) } } res.size } ) val map: RDD[(String, Int)] = values.map ( x => { val keyArray: Array[String] = x._1.split ( "," ) val key = keyArray ( 1 ) (key, x._2) } ) map.reduceByKey(_+_).foreach(println) } }
next seq
最新推荐文章于 2025-01-15 18:57:28 发布