Java Spark2.1.0 读取文本写入MySQL

本文介绍如何使用Java和Spark将数据文件处理后导入MySQL数据库,包括数据连接配置、数据读取、过滤、转换以及最终写入数据库的过程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;

import java.util.HashSet;
import java.util.Set;
import java.sql.Connection;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

public class ToMysql {
    public static void main(String[] args) {

        final Set<String> set = new HashSet<String>();
        String driver = "com.mysql.jdbc.Driver";
        String url = "jdbc:mysql://********:3306/bigdata?useSSL=false";
        Properties properties = new Properties();
        properties.put("user","user");
        properties.put("password","password");
        properties.put("driver","com.mysql.jdbc.Driver");

        SparkSession spark = SparkSession.builder().appName("Spark2").master("local[4]").getOrCreate();
        JavaRDD<String> input = spark.sparkContext()
                .textFile("path",1).toJavaRDD().map(new Function<String, String>() {
                    public String call(String s) throws Exception {
                        String[] tmps = s.split("\t");
                        if(tmps.length == 21){
                            String label = tmps[16] + "\t" + tmps[19] + "\t" + tmps[5] + "\t" + tmps[17] + "\t" + tmps[6]
                                    + "\t" + tmps[9] + "\t" + tmps[4];
                            if(!set.contains(label)){
                                set.add(label);
                                return s;
                            }

                        }
                        return null;
                    }
                });
        JavaRDD<Row> inputRows = input.filter(new Function<String, Boolean>() {
            public Boolean call(String s) throws Exception {
                if(null == s){
                    return false;
                }
                String[] strs = s.split("\t");
                if("".equals(strs[14])){
                    return false;
                }
                return true;
            }
        }).map(new Function<String, Row>() {
            public Row call(String s) throws Exception {
                String[] tmp = s.split("\t");
                return RowFactory.create(Integer.valueOf(tmp[0]),tmp[1],tmp[2],Integer.valueOf(tmp[3]),
                        tmp[4],tmp[5],tmp[6],tmp[7],tmp[8],tmp[9],tmp[10],Integer.valueOf(tmp[11]),tmp[12],
                        tmp[13],Integer.valueOf(tmp[14]),tmp[15],
                        tmp[16],tmp[17],Integer.valueOf(tmp[18]),tmp[19],tmp[20]);
            }
        });
        List structFields = new ArrayList();
        structFields.add(DataTypes.createStructField("id",DataTypes.IntegerType,true));
        structFields.add(DataTypes.createStructField("create_time",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("ruuid",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("version",DataTypes.IntegerType,true));
        structFields.add(DataTypes.createStructField("addition",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("bookname",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("content",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("dversion",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("grade",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("message",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("operatedate",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("pageindex",DataTypes.IntegerType,true));
        structFields.add(DataTypes.createStructField("realname",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("school",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("source",DataTypes.IntegerType,true));
        structFields.add(DataTypes.createStructField("status",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("time",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("type",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("week",DataTypes.IntegerType,true));
        structFields.add(DataTypes.createStructField("userid",DataTypes.StringType,true));
        structFields.add(DataTypes.createStructField("caption",DataTypes.StringType,true));

        StructType structType = DataTypes.createStructType(structFields);
        Dataset<Row> ds = spark.createDataFrame(inputRows,structType);
        ds.write().mode("append").jdbc(url,"tablename",properties);
        spark.close();

    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值