def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
val conf = HBaseConfiguration.create()
//设置zooKeeper集群地址,也可以通过将hbase-site.xml导入classpath,但是建议在程序里这样设置
conf.set("hbase.zookeeper.quorum", "localhost")
//设置zookeeper连接端口,默认2181
conf.set("hbase.zookeeper.property.clientPort", "2181")
val tablename = "account"
//初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的!
//使用Hadoop支持的文件系统格式
val jobConf = new JobConf(conf)
//设置输出的格式
jobConf.setOutputFormat(classOf[TableOutputFormat])
//设置输出的表名
jobConf.set(TableOutputFormat.OUTPUT_TABLE, tablename)
val indataRDD = sc.makeRDD(Array("abc123,May,15", "abc124,luj,16", "bcd121,jual,16"))
val rdd: RDD[(ImmutableBytesWritable, Put)] = indataRDD.map(_.split(',')).map { arr => {
/*一个Put对象就是一行记录,在构造方法中指定主键
* 所有插入的数据必须用org.apache.hadoop.hbase.util.Bytes.toBytes方法转换
* Put.add方法接收三个参数:列族,列名,数据
*/
val put = new Put(Bytes.toBytes(arr(0).toString))
put.addColumn(Bytes.toBytes("cf"), "name".getBytes, Bytes.toBytes(arr(1)))
put.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("age"), Bytes.toBytes(arr(2)))
//转化成RDD[(ImmutableBytesWritable,Put)]类型才能调用saveAsHadoopDataset
(new ImmutableBytesWritable, put)
}
}
val finalRdd = indataRDD.map(x => {
val pp = x.split(",")
convertRDD((pp(0), pp(1), pp(2)))
})
rdd.saveAsHadoopDataset(jobConf)
sc.stop()
}
def convertRDD(triple: (String, String, String)) = {
val p = new Put(triple._1.getBytes)
p.addColumn("cf".getBytes, "name".getBytes, triple._2.getBytes)
p.addColumn("cf".getBytes, "name".getBytes, triple._3.getBytes)
(new ImmutableBytesWritable, p)
}
def createTable(tableName: String, familCol: Seq[String]) = {
val conn = getConnection()
val admin = conn.getAdmin
val tbName = TableName.valueOf(tableName)
val hdp = new HTableDescriptor(tbName)
if (!admin.tableExists(tbName)) {
familCol.foreach(x => {
hdp.addFamily(new HColumnDescriptor(x.getBytes))
})
}
admin.createTable(hdp)
conn.close()
}
def getConnection(): Connection = {
val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.property.clientPort", "2181")
conf.set("hbase.zookeeper.quorum", "localhost")
conf.set("hbase.master", "127.0.0.1:60000")
//Connection 的创建是个重量级的工作,线程安全,是操作hbase的入口
val conn = ConnectionFactory.createConnection(conf)
conn
}
def dropTable(tableName: String): Unit = {
val conn = getConnection()
val admin = conn.getAdmin
val tbName = TableName.valueOf(tableName)
if (admin.tableExists(tbName)) {
admin.disableTable(tbName)
admin.deleteTable(tbName)
}
conn.close()
}
def addRow2Table(tableName: String, rowkey: String, familCol: String, qualifer: String, colvalue: String) = {
val tbName = TableName.valueOf(tableName)
val conn = getConnection()
val table = conn.getTable(tbName)
val put = new Put(rowkey.getBytes)
put.addColumn(familCol.getBytes, qualifer.getBytes, colvalue.getBytes)
table.put(put)
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("hbase基本使用").setMaster("local[*]")
implicit val sc = new SparkContext(conf)
val sqlcontext = new SQLContext(sc)
import sqlcontext.implicits._
// dropTable("account")
// createTable("account", Seq("cf"))
val tableRDD = Hbase2RDD("account").cache()
println("allData:" + tableRDD.count())
tableRDD.foreach { case (_, resutl) => {
// val rowkye = Bytes.toInt(resutl.getRow)
// val age = Bytes.toInt(resutl.getValue("cf".getBytes, "age".getBytes))
// val name = Bytes.toString(resutl.getValue("cf".getBytes, "name".getBytes))
// println("rowkye:" + rowkye + "----" + "age:" + age + "---" + "name:" + name)
val cell = resutl.rawCells()
println("rowKey:" + new String(resutl.getRow))
cell.foreach(x => {
val colFamily = new String(CellUtil.cloneFamily(x))
val colQualifier = Bytes.toString(CellUtil.cloneQualifier(x))
val colValue = Bytes.toString(CellUtil.cloneValue(x))
println("colFamily: " + colFamily + " colQualifier:" + colQualifier + " colValue:" + colValue)
})
}
}