spark kafka hbase-优快云博客

本文介绍如何使用Spark将数据输出到HBase中，包括配置HBase连接、创建表及通过Spark任务将数据逐条插入到HBase的具体实现。

接上一个示例，结果输出到hbase

<dependency>
  <groupId>org.apache.hbase</groupId>
  <artifactId>hbase-shaded-client</artifactId>
  <version>1.2.1</version>
</dependency>

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.util.Bytes

 wordCounts.foreachRDD { rdd =>
      rdd.foreachPartition { partitionOfRecords =>
        // ConnectionPool is a static, lazily initialized pool of connections
        val hbaseConf = HBaseConfiguration.create()
        hbaseConf.set("hbase.zookeeper.quorum", "dcdev01,dcdev02,dcdev03")
        hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")
        hbaseConf.set("hbase.defaults.for.version.skip", "true")

        val hTable:HTable = new HTable(hbaseConf, "web_log");//指定表名
        hTable.setAutoFlush(false, false)//关闭自动提交
        hTable.setWriteBufferSize(3*1024*1024)//设置缓存大小
        partitionOfRecords.foreach(record => send(hTable, record))
        hTable.flushCommits()//手动提交

      }
    }

def send(hTable:HTable , record:(String, Long)):Unit = {
    val put = new Put(Bytes.toBytes(System.currentTimeMillis()))//rowkey
    put.add(Bytes.toBytes("cf"), Bytes.toBytes("eventlevel"), Bytes.toBytes(record._1))
    put.add(Bytes.toBytes("cf"), Bytes.toBytes("number"), Bytes.toBytes(record._2))
    hTable.put(put)
  }