import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.log4j.{Level, LogManager}
import org.apache.spark.{SparkConf, SparkContext};
/**
* Created by owlcabin on 2016/5/27.
*/
object SparkHBase {
def main(args: Array[String]): Unit = {
//Spark环境初始化
val sparkConf = new SparkConf()
val sparkContext = new SparkContext(sparkConf)
LogManager.getRootLogger.setLevel(Level.WARN)
val sqlContext = new org.apache.spark.sql.SQLContext(sparkContext)
//通过zookeeper获取HBase连接
val hbaseConf = HBaseConfiguration.create()
hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")
hbaseConf.set("hbase.zookeeper.quorum", "dmp01,dmp02,dmp03,dmp04,dmp05")
//设置读取表名
hbaseConf.set(TableInputFormat.INPUT_TABLE, "t_prod_weixin_art")
//设置读取列组
hbaseConf.set(TableInputFormat.SCAN_COLUMNS, "info")
//应用newAPIHadoopRDD读取HBase,返回NewHadoopRDD
val hbaseRDD = sparkContext.newAPIHadoopRDD(hbaseConf, classOf[TableInputFormat],
classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
classOf[org.apache.hadoop.hbase.client.Result])
//读取结果集RDD,返回一个MapPartitionsRDD
val resRDD = hbaseRDD.map(tuple => tuple._2)
//打印读取数据内容
resRDD.map(r => (Bytes.toString(r.getRow),
Bytes.toString(r.getValue(Bytes.toBytes("info"), Bytes.toBytes("content"))))).take(10).foreach(println(_))
}
}