使用Seatunnel进行mysql到http、mysql到mysql、http到mysql以及mysql-cdc到HTTP的数据同步示例。
官方文档:https://seatunnel.apache.org/zh-CN/docs/2.3.7/connector-v2/source/Jdbc
在操作mysql同步之前,需要先下载对应版本的jdbc驱动jar包mysql-connector-java-xxx.jar(https://mvnrepository.com/artifact/mysql/mysql-connector-java)然后放到Seatunnel安装路径下的lib文件夹下。对于Spark/Flink,也应该将jar包复制到 S P A R K H O M E / j a r s / 或 SPARK_HOME/jars/或 SPARKHOME/jars/或FLINK_HOME/lib/。
1,mysql同步到http接口
env {
execution.parallelism = 2
job.mode = "BATCH" #mysql作为数据源,只支持批量同步
}
source {
jdbc {
url = "jdbc:mysql://172.27.10.22:6033/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "root"
password = "root"
query = "SELECT * FROM user_info ORDER BY create_time LIMIT 1"
result_table_name = "user_info_out"
}
}
transform {
Sql {
source_table_name = "user_info_out"
result_table_name = "user_info_sink"
query = "select info, user_name, age from user_info_out"
}
}
sink {
Console {
source_table_name = "user_info_sink"
}
http{
source_table_name = "user_info_sink"
url = "https://test.test.com:8080/api/user/test"
method = "POST"
headers = {Accept="application/json",Content-Type="application/json;charset=utf-8"}
}
}
2,mysql同步到mysql
env {
execution.parallelism = 2
job.mode = "BATCH"
}
source{
Jdbc {
url = "jdbc:mysql://172.27.10.22:6033/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 10
user = "root"
password = "root"
query = "SELECT `name`,`score` FROM `user`"
result_table_name = "user_info"
}
}
transform {
}
sink {
Jdbc {
source_table_name = "user_info"
url = "jdbc:mysql://192.27.10.22:16033/temp_user"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "root"
password = "root"
query = "INSERT INTO `student`(`name`,`score`) VALUES(?,?)"
}
}
3,http接口同步到mysql
env {
execution.parallelism = 2
job.mode = "STREAMING" #http作为数据源,支持批量模式和流式模式
checkpoint.interval = 10000 #执行间隔/毫秒
}
source {
Http {
url = "https://test.test.com:8080/api/test"
method = "GET"
format = "json"
headers = {Authorization="Bearer eyJhbGciOiJIUzUxMiJ9.eyJqdGkiOiI1Zjc1NzVjMjI4NDQ0OTFmOTQ1ZWY1ZjI5NzlmMDRiNyIsInN1YiI6InpqdCJ9.49bfdthmvGs2DWKJxvrlz6vtEGckVDfBtKIvFfN1UzHJ-LHboZ2NpPj5bX6XZ1lk7l-O-jh_PnfQ7LLZvCFVc",language="zh"}
params = {userId="fa438165b2c84d8dbe9175d152718437"}
content_field = "$.content.*"
schema = {
fields {
userId= string
age= int
phone= string
name=string
}
}
result_table_name = "user_info"
}
}
transform {
Sql {
source_table_name = "user_info"
result_table_name = "user_info_out"
query = "SELECT name as userName,userId,age,phone FROM user_info"
}
}
sink {
Console {
source_table_name = "user_info_out"
}
Jdbc {
url = "jdbc:mysql://172.27.10.22:26033/test"
driver = "com.mysql.cj.jdbc.Driver"
connection_check_timeout_sec = 100
user = "root"
password = "root"
source_table_name = "user_info_out"
query = "INSERT INTO `user_bak`(`userName`, `userId`, `age`, `phone`) VALUES (?,? ,?,?)"
}
}
4,mysql-cdc同步到http接口
env {
execution.parallelism = 2
job.mode = "STREAMING" #mysql-cdc作为数据源,支持批量模式和流式模式
checkpoint.interval = 10000 #执行间隔/毫秒
}
source {
MySQL-CDC {
catalog = {
factory = MySQL
}
base-url = "jdbc:mysql://"${mysql_ip_port}"/test?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false"
username = ${mysql_username}
password = ${mysql_pass}
table-names = ["test.user"]
startup.mode = "initial"
result_table_name = "user_info_out"
table-names-config = [
{
table = "test.user"
primaryKeys = ["user_id"]
}
]
}
}
transform {
FilterRowKind {
source_table_name = "user_info_out"
result_table_name = "user_info_sink"
include_kinds = ["UPDATE_AFTER","INSERT"]
}
}
sink {
Console {
source_table_name = "user_info_sink"
}
http{
source_table_name = "user_info_sink"
url = "https://test.test.com:28080/api/user/test"
method = "POST"
headers = {Accept="application/json",Content-Type="application/json;charset=utf-8"}
}
}