教程:
多种类型数据源加载和保存
// $example on:generic_load_save_functions$
Dataset<Row> usersDF = spark.read().load("examples/src/main/resources/users.parquet");
usersDF.select("name", "favorite_color").write().save("namesAndFavColors.parquet");
// $example off:generic_load_save_functions$
// $example on:manual_load_options$
Dataset<Row> peopleDF = spark.read().format("json").load("examples/src/main/resources/people.json");
peopleDF.select("name", "age").write().format("parquet").save("namesAndAges.parquet");
// $example off:manual_load_options$
// $example on:manual_load_options_csv$
Dataset<Row> peopleDFCsv = spark.read().format("csv")
.option("sep", ";")
.option("inferSchema", "true")
.option("header", "true")
.load("examples/src/main/resources/people.csv");
// $example off:manual_load_options_csv$
// $example on:direct_sql$
Dataset<Row> sqlDF =
spark.sql("SELECT * FROM parquet.`examples/src/main/re