- 数据导入:hbase提供importTSV工具导入tsv格式到hbase的表中。
- 数据导入导出:hbase提供导入导出功能,供将某个节点的数据导出后再导入到其他节点。
- 可以利用flume工具将日志导入到hbase
安装nginx、将logs目录赋权给hadoop用户。
创建hbase表
hbase shell create 'mikeal-hbase-table','familyclom1','familyclom2' |
创建test-flume-into-hbase.conf文件
# 从文件读取实时消息,不做处理直接存储到Hbase agent.sources = logfile-source agent.channels = file-channel agent.sinks = hbase-sink
# logfile-source配置 agent.sources.logfile-source.type = exec agent.sources.logfile-source.command = tail -f /var/log/nginx/access.log agent.sources.logfile-source.checkperiodic = 50 # 组合source和channel agent.sources.logfile-source.channels = file-channel
# channel配置,使用本地file agent.channels.file-channel.type = file agent.channels.file-channel.checkpointDir = /opt/data/flume-hbase-test/checkpoint agent.channels.file-channel.dataDirs = /opt/data/flume-hbase-test/data
# sink 配置为HBaseSink 和 SimpleHbaseEventSerializer agent.sinks.hbase-sink.type = org.apache.flume.sink.hbase.HBaseSink #HBase表名 agent.sinks.hbase-sink.table = mikeal-hbase-table #HBase表的列族名称 agent.sinks.hbase-sink.columnFamily = familyclom1 agent.sinks.hbase-sink.serializer = org.apache.flume.sink.hbase.SimpleHbaseEventSerializer #HBase表的列族下的某个列名称 agent.sinks.hbase-sink.serializer.payloadColumn = cloumn-1 # 组合sink和channel agent.sinks.hbase-sink.channel = file-channel |
启动flume
flume-ng agent --name agent --conf-file test-flume-into-hbase.conf -Dflume.root.logger=DEBUG,console --后台启动 nohup flume-ng agent --name agent --conf-file test-flume-into-hbase.conf -Dflume.root.logger=DEBUG,console >> flume.log 2>&1 & |
验证
curl http://localhost hbase shell scan ‘mikeal-hbase-table’ |