接上一个示例,结果输出到hbase
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-shaded-client</artifactId>
<version>1.2.1</version>
</dependency>
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes
wordCounts.foreachRDD { rdd =>
rdd.foreachPartition { partitionOfRecords =>
// ConnectionPool is a static, lazily initialized pool of connections
val hbaseConf = HBaseConfiguration.create()
hbaseConf.set("hbase.zookeeper.quorum", "dcdev01,dcdev02,dcdev03")
hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")
hbaseConf.set("hbase.defaults.for.version.skip", "true")
val hTable:HTable = new HTable(hbaseConf, "web_log");//指定表名
hTable.setAutoFlush(false, false)//关闭自动提交
hTable.setWriteBufferSize(3*1024*1024)//设置缓存大小
partitionOfRecords.foreach(record => send(hTable, record))
hTable.flushCommits()//手动提交
}
}
def send(hTable:HTable , record:(String, Long)):Unit = {
val put = new Put(Bytes.toBytes(System.currentTimeMillis()))//rowkey
put.add(Bytes.toBytes("cf"), Bytes.toBytes("eventlevel"), Bytes.toBytes(record._1))
put.add(Bytes.toBytes("cf"), Bytes.toBytes("number"), Bytes.toBytes(record._2))
hTable.put(put)
}
Spark数据写入HBase示例
本文介绍如何使用Spark将数据输出到HBase中,包括配置HBase连接、创建表及通过Spark任务将数据逐条插入到HBase的具体实现。
127

被折叠的 条评论
为什么被折叠?



