利用Spark SQL的DataFream 将hbase表数据保存为csv或者parquet格式文件。
代码:
package com.cbp.spark_hbase
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.log4j.{
Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{
DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.{
StringType, StructField, StructType}
import scala.collection.mutable.ArrayBuffer
object SparkReadHbaseSaveCsvOrParquet {
Logger.getLogger("org").setLevel(Level.INFO)
def main(