spark RDD写入 Hbase

本文介绍如何使用Apache Spark与HBase进行数据交互,包括创建HBase表、写入数据到HBase以及从HBase读取数据的过程。文章详细展示了通过Spark RDD方式实现HBase数据读写的具体步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

def main(args: Array[String]): Unit = {
  val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[*]")
  val sc = new SparkContext(sparkConf)

  val conf = HBaseConfiguration.create()
  //设置zooKeeper集群地址,也可以通过将hbase-site.xml导入classpath,但是建议在程序里这样设置
  conf.set("hbase.zookeeper.quorum", "localhost")
  //设置zookeeper连接端口,默认2181
  conf.set("hbase.zookeeper.property.clientPort", "2181")

  val tablename = "account"

  //初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的!
  //使用Hadoop支持的文件系统格式
  val jobConf = new JobConf(conf)
  //设置输出的格式
  jobConf.setOutputFormat(classOf[TableOutputFormat])
  //设置输出的表名
  jobConf.set(TableOutputFormat.OUTPUT_TABLE, tablename)

  val indataRDD = sc.makeRDD(Array("abc123,May,15", "abc124,luj,16", "bcd121,jual,16"))

  val rdd: RDD[(ImmutableBytesWritable, Put)] = indataRDD.map(_.split(',')).map { arr => {
    /*一个Put对象就是一行记录,在构造方法中指定主键
     * 所有插入的数据必须用org.apache.hadoop.hbase.util.Bytes.toBytes方法转换
     * Put.add方法接收三个参数:列族,列名,数据
     */
    val put = new Put(Bytes.toBytes(arr(0).toString))
    put.addColumn(Bytes.toBytes("cf"), "name".getBytes, Bytes.toBytes(arr(1)))
    put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("age"), Bytes.toBytes(arr(2)))
    //转化成RDD[(ImmutableBytesWritable,Put)]类型才能调用saveAsHadoopDataset
    (new ImmutableBytesWritable, put)
  }
  }

  val finalRdd = indataRDD.map(x => {
    val pp = x.split(",")
    convertRDD((pp(0), pp(1), pp(2)))
  })

  rdd.saveAsHadoopDataset(jobConf)


  sc.stop()
}

def convertRDD(triple: (String, String, String)) = {

  val p = new Put(triple._1.getBytes)
  p.addColumn("cf".getBytes, "name".getBytes, triple._2.getBytes)
  p.addColumn("cf".getBytes, "name".getBytes, triple._3.getBytes)
  (new ImmutableBytesWritable, p)

}

def createTable(tableName: String, familCol: Seq[String]) = {

  val conn = getConnection()
  val admin = conn.getAdmin
  val tbName = TableName.valueOf(tableName)
  val hdp = new HTableDescriptor(tbName)
  if (!admin.tableExists(tbName)) {
    familCol.foreach(x => {
      hdp.addFamily(new HColumnDescriptor(x.getBytes))
    })
  }
  admin.createTable(hdp)
  conn.close()
}

def getConnection(): Connection = {

  val conf = HBaseConfiguration.create()
  conf.set("hbase.zookeeper.property.clientPort", "2181")
  conf.set("hbase.zookeeper.quorum", "localhost")
  conf.set("hbase.master", "127.0.0.1:60000")
  //Connection 的创建是个重量级的工作,线程安全,是操作hbase的入口
  val conn = ConnectionFactory.createConnection(conf)
  conn
}
def dropTable(tableName: String): Unit = {
  val conn = getConnection()
  val admin = conn.getAdmin
  val tbName = TableName.valueOf(tableName)
  if (admin.tableExists(tbName)) {
    admin.disableTable(tbName)
    admin.deleteTable(tbName)

  }
  conn.close()

}

def addRow2Table(tableName: String, rowkey: String, familCol: String, qualifer: String, colvalue: String) = {
  val tbName = TableName.valueOf(tableName)
  val conn = getConnection()
  val table = conn.getTable(tbName)
  val put = new Put(rowkey.getBytes)
  put.addColumn(familCol.getBytes, qualifer.getBytes, colvalue.getBytes)
  table.put(put)
}
def main(args: Array[String]): Unit = {

    val conf = new SparkConf().setAppName("hbase基本使用").setMaster("local[*]")
    implicit val sc = new SparkContext(conf)
    val sqlcontext = new SQLContext(sc)
    import sqlcontext.implicits._
//    dropTable("account")
//    createTable("account", Seq("cf"))
    val tableRDD = Hbase2RDD("account").cache()

    println("allData:" + tableRDD.count())
    tableRDD.foreach { case (_, resutl) => {
      //      val rowkye = Bytes.toInt(resutl.getRow)
      //      val age = Bytes.toInt(resutl.getValue("cf".getBytes, "age".getBytes))
      //      val name = Bytes.toString(resutl.getValue("cf".getBytes, "name".getBytes))
      //      println("rowkye:" + rowkye + "----" + "age:" + age + "---" + "name:" + name)

      val cell = resutl.rawCells()
      println("rowKey:" + new String(resutl.getRow))
      cell.foreach(x => {
        val colFamily = new String(CellUtil.cloneFamily(x))
        val colQualifier = Bytes.toString(CellUtil.cloneQualifier(x))
        val colValue = Bytes.toString(CellUtil.cloneValue(x))
        println("colFamily: " + colFamily + "  colQualifier:" + colQualifier + "  colValue:" + colValue)
      })
    }
    }




                
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值