1.如何读取mysql中的数据?
public class JDBCDataSource {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("JDBCDataSource").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
SQLContext sqlContext = new SQLContext(sc);
// 方法1、分别将mysql中两张表的数据加载为DataFrame
/*
* Map<String, String> options = new HashMap<String, String>();
* options.put("url", "jdbc:mysql://hadoop1:3306/testdb");
* options.put("driver", "com.mysql.jdbc.Driver");
* options.put("user","spark");
* options.put("password", "spark2016");
* options.put("dbtable", "student_info");
* DataFrame studentInfosDF = sqlContext.read().format("jdbc").options(options).load();
*
* options.put("dbtable", "student_score");
* DataFrame studentScoresDF = sqlContext.read().format("jdbc") .options(options).load();
*/
// 方法2、分别将mysql中两张表的数据加载为DataFrame
DataFrameReader reader = sqlContext.read().format("jdbc");
reader.option("url", "jdbc:mysql://node4:3306/testdb");
reader.option("driver", "com.mysql.jdbc.Driver");
reader.option("user", "root");
reader.option("password", "123");
reader.option("dbtable", "student_info");
DataFrame studentInfosDF = reader.load();
reader.option("dbtable", "student_score");
DataFrame studentScoresDF = reader.load();
// 将两个DataFrame转换为JavaPairRDD,执行join操作
studentInfosDF.registerTempTable("studentInfos");
studentScoresDF.registerTempTable("studentScores");
String sql = "SELECT studentInfos.name,age,score "
+ " FROM studentInfos JOIN studentScores"
+ " ON (studentScores.name = studentInfos.name)"
+ " WHERE studentScores.score > 80";
DataFrame sql2 = sqlContext.sql(sql);
sql2.show();
}
}