import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import java.util.HashSet;
import java.util.Set;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class ToMysql {
public static void main(String[] args) {
final Set<String> set = new HashSet<String>();
String driver = "com.mysql.jdbc.Driver";
String url = "jdbc:mysql://********:3306/bigdata?useSSL=false";
Properties properties = new Properties();
properties.put("user","user");
properties.put("password","password");
properties.put("driver","com.mysql.jdbc.Driver");
SparkSession spark = SparkSession.builder().appName("Spark2").master("local[4]").getOrCreate();
JavaRDD<String> input = spark.sparkContext()
.textFile("path",1).toJavaRDD().map(new Function<String, String>() {
public String call(String s) throws Exception {
String[] tmps = s.split("\t");
if(tmps.length == 21){
String label = tmps[16] + "\t" + tmps[19] + "\t" + tmps[5] + "\t" + tmps[17] + "\t" + tmps[6]
+ "\t" + tmps[9] + "\t" + tmps[4];
if(!set.contains(label)){
set.add(label);
return s;
}
}
return null;
}
});
JavaRDD<Row> inputRows = input.filter(new Function<String, Boolean>() {
public Boolean call(String s) throws Exception {
if(null == s){
return false;
}
String[] strs = s.split("\t");
if("".equals(strs[14])){
return false;
}
return true;
}
}).map(new Function<String, Row>() {
public Row call(String s) throws Exception {
String[] tmp = s.split("\t");
return RowFactory.create(Integer.valueOf(tmp[0]),tmp[1],tmp[2],Integer.valueOf(tmp[3]),
tmp[4],tmp[5],tmp[6],tmp[7],tmp[8],tmp[9],tmp[10],Integer.valueOf(tmp[11]),tmp[12],
tmp[13],Integer.valueOf(tmp[14]),tmp[15],
tmp[16],tmp[17],Integer.valueOf(tmp[18]),tmp[19],tmp[20]);
}
});
List structFields = new ArrayList();
structFields.add(DataTypes.createStructField("id",DataTypes.IntegerType,true));
structFields.add(DataTypes.createStructField("create_time",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("ruuid",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("version",DataTypes.IntegerType,true));
structFields.add(DataTypes.createStructField("addition",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("bookname",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("content",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("dversion",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("grade",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("message",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("operatedate",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("pageindex",DataTypes.IntegerType,true));
structFields.add(DataTypes.createStructField("realname",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("school",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("source",DataTypes.IntegerType,true));
structFields.add(DataTypes.createStructField("status",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("time",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("type",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("week",DataTypes.IntegerType,true));
structFields.add(DataTypes.createStructField("userid",DataTypes.StringType,true));
structFields.add(DataTypes.createStructField("caption",DataTypes.StringType,true));
StructType structType = DataTypes.createStructType(structFields);
Dataset<Row> ds = spark.createDataFrame(inputRows,structType);
ds.write().mode("append").jdbc(url,"tablename",properties);
spark.close();
}
}
Java Spark2.1.0 读取文本写入MySQL
最新推荐文章于 2025-05-18 23:33:02 发布