创建关系文件

import org.apache.spark.sql.SparkSession

object JoinLabel {
def main(args: Array[String]): Unit = {
val session = SparkSession.builder().master(“local[1]”).appName(“make_label”).getOrCreate()

import session.implicits._
val csvLabel1 = session.read.csv("./data/rel1.csv").rdd
val csvLabel2 = session.read.csv("./data/rel2.csv").rdd
val csvOrign = session.read.csv("./data/ownthink_v2.csv").rdd

val origRdd = csvOrign.filter(_.size == 3).filter(x => !(x.get(0)==null)).map(x => {
  (x.get(0).toString, x)
})
val label1 = csvLabel1.map(x => {
  (x.get(1).toString, x.get(0).toString)
})
val label2 = csvLabel2.map(x => {
  (x.get(1).toString, x.get(0).toString)
})

// key,(id,info)
val midRes = label1.join(origRdd)

val joinRes = midRes.filter(x => !(x._2._2.get(2) == null)).map(x => {
  (x._2._2.get(2).toString, (x._2._1, x._2._2))
})

val joinRdd = label2.join(joinRes)
// key,(id2,(id1,info))
val finRes = joinRdd.map(x => {
  val id_left = x._2._2._1
  val property = x._2._2._2.get(1).toString
  val id_right = x._2._1
  (id_left, property, id_right, "RELATIONSHIP")
})
// :START_ID,name,:END_ID,:TYPE
val finFrame = finRes.toDF(":START_ID","name",":END_ID",":TYPE")
finFrame.write.option("header","true").csv("./data/finRes")

}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值