求最短距离--pregel

飞机航班信息字段如下:求随机一个机场到其它所有机场的最短航线:
(要点:得到需要的graph即可,其余初始化,pregel为固定格式固定代码)

月中第几天,
周中第几天,
航空公司,
飞机注册号,
航班号,
起飞机场编号,------
起飞机场,-------
到达机场编号,-------
到达机场,--------
预计起飞时间,
起飞时间,
起飞延迟,
预计到达时间,
到达时间,
到达延迟,
预计飞行时间,
飞行距离---------
package graph.etl
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object FlightDemo {
  def main(args: Array[String]): Unit = {


    val conf: SparkConf = new SparkConf().setMaster("local[8]").setAppName("ETL")
    val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
    val sc: SparkContext = spark.sparkContext
    import spark.implicits._
    //加载数据
    val flightRDD: RDD[String] = sc.textFile("in/flight.csv")
    //  flightRDD.collect().foreach(println)
    val airPort: RDD[(VertexId, String)] = flightRDD.map(x => x.split(","))
      .flatMap(x => Array((x(5).toLong, x(6)), ((x(7).toLong, x(8)))))
      .distinct()
    airPort
//    airPort.collect().foreach(println)
    val lines: RDD[Edge[PartitionID]] = flightRDD.map(x => x.split(",")) //起飞机场编号,降落机场编号,飞行距离
      .map(x => (x(5).toLong, x(7).toLong, x(16).toInt))
      .distinct()
      .map(x => Edge(x._1, x._2, x._3))
//    lines.collect().foreach(println)
//    println(lines.count())
    //构建graph对象
    val graph = Graph(airPort,lines)
//    graph.triplets.collect().foreach(println)
    //机场数量
    val numvertices: VertexId = graph.numVertices
    println("机场数量"+numvertices)
    val numeddges: VertexId = graph.numEdges
    println("航线数量"+numeddges)
    //计算最长的航线
    val rdd1: RDD[EdgeTriplet[String, PartitionID]] = graph.triplets.sortBy(x=>x.attr,false)
//    rdd1.collect().foreach(println)
    rdd1.take(1)(0)
    val rdd2: RDD[String] = rdd1.map(triplet=>triplet.srcAttr+" "+triplet.dstAttr+"距离:"+triplet.attr)
//rdd2.collect().foreach(println)
    rdd2.take(1)(0)               //take(n)返回前n个元素,组成一个Array()数组,(0)表示第一个
//    for (elem <- strings) {
//      println(elem)
//    }
    //最繁忙机场
    val indegrees: VertexRDD[PartitionID] = graph.inDegrees
    //    indegrees.collect().foreach(println)
    val busyAirPort: (VertexId, PartitionID) = indegrees.sortBy(x=>x._2,false).take(1)(0)
    println(busyAirPort)

    val outdegrees: VertexRDD[PartitionID] = graph.outDegrees
    //    outdegrees.collect().foreach(println)
    val busyAirPort2: (VertexId, PartitionID) = outdegrees.sortBy(x=>x._2,false).take(1)(0)
    println(busyAirPort2)
    //找出最重要的飞行航线
    val vertices: VertexRDD[Double] = graph.pageRank(0.05).vertices
//    vertices.sortBy(x=>x._2,false).collect().foreach(println)

    //找出最便宜的飞行航线
    //price = 18.0 + distance*0.15

    val value: RDD[(VertexId, String)] = airPort.sample(false,1.0/airPort.count(),1)
    val source_id: VertexId = 13930.toLong
    val srcAirportName: String = value.first()._2
    val srcAirportId: VertexId = value.first()._1
    println(source_id,srcAirportId,srcAirportName)
    val init_graph: Graph[Double, Double] = graph.mapVertices((id, value) => {
      if (id == srcAirportId) value=0.0
      else value=Double.PositiveInfinity
    }).mapEdges(e => 180 + 0.15 * e.attr.toDouble)
    val pregel_graph: Graph[Double, Double] = init_graph.pregel(
      Double.PositiveInfinity,
      Int.MaxValue,
      EdgeDirection.Out
    )(
      (id, dist, new_dist) => math.min(dist, new_dist),
      triple => {
        if (triple.attr + triple.srcAttr < triple.dstAttr)
          Iterator((triple.dstId, triple.attr + triple.srcAttr))
        else
          Iterator.empty
      },
      (dist, new_dist) => math.min(dist, new_dist)
    )
//    pregel_graph.triplets.collect.foreach(println)
    val tuples: Array[(VertexId, Double)] = pregel_graph.vertices.sortBy(x=>x._2,false).take(3)
    println(tuples.toList)

  }
}
`1``
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值