@羲凡——只为了更好的活着
Spark2.0 插入/更新mysql数据(scala)
特别强调楼主使用spark2.3.2版本
1.准备工作
在pom.xml文件中要添加
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
2.注意事项
a.向mysql中插入或更新数据,循环时要使用foreachPartition,可以大幅减少mysql的连接数
b.代码中最好加上 classOf[com.mysql.jdbc.Driver] ,不加有时会报错
3.不啰嗦直接上代码
package csdn
import java.sql.{Connection, DriverManager}
import java.util.Properties
import org.apache.spark.sql.SparkSession
object SparkCURDMysql {
case class StuInfo(name: String, age: Int)
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder()
.appName("SparkCURDMysql")
.master("local[*]")
.enableHiveSupport()
.getOrCreate()
//mysql的参数设置
val url = "jdbc:mysql://deptest20:3306/test_data"
val user = "root"
val password = "123456"
val props = new Properties()
props.put("user", user)
props.put("password", password)
props.setProperty("useSSL", "false")
props.setProperty("useUnicode", "true")
props.setProperty("characterEncoding", "utf8")
var connection: Connection = null
import spark.implicits._
// 插入mysql的dataframe(减少分区数)
val insertDF = Seq(StuInfo("盖伦", 18), StuInfo("扎克", 27)
).toDF().coalesce(1)
// 按分区插入到mysql中
insertDF.foreachPartition(iter => {
try {
classOf[com.mysql.jdbc.Driver]
connection = DriverManager.getConnection(url, props)
while (iter.hasNext) {
val row = iter.next()
val name = row.getAs[String]("name")
val age = row.getAs[Int]("age")
val sql = s"insert into test_data.mysql_stu_info(name,age) values('$name',$age)"
connection.createStatement().executeUpdate(sql)
println(s"=====$name insert succeed !======")
}
} catch {
case e: Exception => println(e.printStackTrace())
} finally {
connection.close()
}
})
println("=============线程等待3秒=============")
Thread.sleep(3000)
// 更新mysql的dataframe(减少分区数)
val updateDF = Seq(StuInfo("盖伦", 188), StuInfo("扎克", 227)
).toDF().coalesce(1)
// 按分区更新到mysql中
updateDF.foreachPartition(iter => {
try {
classOf[com.mysql.jdbc.Driver]
connection = DriverManager.getConnection(url, props)
while (iter.hasNext) {
val row = iter.next()
val name = row.getAs[String]("name")
val age = row.getAs[Int]("age")
val sql = s"update test_data.mysql_stu_info set age = $age where name='$name'"
connection.createStatement().executeUpdate(sql)
println(s"=====$name update succeed !======")
}
} catch {
case e: Exception => println(e.printStackTrace())
} finally {
connection.close()
}
})
spark.stop()
}
}
4.结果展示
====================================================================
@羲凡——只为了更好的活着
若对博客中有任何问题,欢迎留言交流