将hdfs上的数据通过MapReduce上传到hbase上

本文介绍了一个使用HBase进行数据加载的具体实现案例。通过自定义Mapper和Reducer,将原始数据转换为HBase可接受的格式,并创建了名为phone_log的表来存储数据。此过程涉及配置、表结构设计及数据写入等多个环节。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package hbase;  


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.hbase.HBaseConfiguration;  
import org.apache.hadoop.hbase.HColumnDescriptor;  
import org.apache.hadoop.hbase.HTableDescriptor;  
import org.apache.hadoop.hbase.TableName;  
import org.apache.hadoop.hbase.client.HBaseAdmin;  
import org.apache.hadoop.hbase.client.Mutation;  
import org.apache.hadoop.hbase.client.Put;  
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;  
import org.apache.hadoop.hbase.mapreduce.TableReducer;  
import org.apache.hadoop.io.LongWritable;  
import org.apache.hadoop.io.NullWritable;  
import org.apache.hadoop.io.Text;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  


import java.io.IOException;  
import java.text.SimpleDateFormat;  
import java.util.Date;  


/** 
 * Created by Administrator on 2017/3/7. 
 */  
public class LoadData extends Configured {  
    public static class LoadDataMapper extends Mapper<LongWritable,Text,LongWritable,Text>{  
        private Text out = new Text();  
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss");  

        @Override  
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {  
            //1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
            String line = value.toString();  
            String [] splited = line.split("\t");  
            String  formatedDate = simpleDateFormat.format(new Date(Long.parseLong(splited[0].trim())));  
            String rowKeyString = splited[1]+":"+formatedDate;  
            out.set(rowKeyString+"\t"+line);  
            //13726230503:201706291728  1363157985066   13726230503 00-FD-07-A4-72-B8:CMCC  120.196.100.82  i02.c.aliimg.com        24  27  2481    24681   200
            context.write(key,out);  
        }  
    }  
    public static class LoadDataReducer extends TableReducer<LongWritable,Text,NullWritable>{  
        public static final String COLUMN_FAMILY = "cf";  
        @Override  
        protected void reduce(LongWritable key, Iterable<Text> values, Reducer<LongWritable, Text, NullWritable, Mutation>.Context context) throws IOException, InterruptedException {  

            for (Text tx : values) {  
                String[] splited = tx.toString().split("\t");  
                String rowkey = splited[0];  


                Put put = new Put(rowkey.getBytes());  
//                put.add(COLUMN_FAMILY.getBytes(), "raw".getBytes(), tx  
//                        .toString().getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "reportTime".getBytes(),  
                        splited[1].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "msisdn".getBytes(),  
                        splited[2].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "apmac".getBytes(),  
                        splited[3].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "acmac".getBytes(),  
                        splited[4].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "host".getBytes(),  
                        splited[5].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "siteType".getBytes(),  
                        splited[6].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "upPackNum".getBytes(),  
                        splited[7].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "downPackNum".getBytes(),  
                        splited[8].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "upPayLoad".getBytes(),  
                        splited[9].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "downPayLoad".getBytes(),  
                        splited[10].getBytes());  
                put.add(COLUMN_FAMILY.getBytes(), "httpStatus".getBytes(),  
                        splited[11].getBytes());  
                context.write(NullWritable.get(), put);  
            }  
        }  
    }  
    public static void createHBaseTable(String tableName) throws IOException {  

        HTableDescriptor htd = new HTableDescriptor(  
                TableName.valueOf(tableName));  
        HColumnDescriptor col = new HColumnDescriptor("cf");  
        htd.addFamily(col);  
        Configuration conf = HBaseConfiguration.create();  
        conf.set("hbase.zookeeper.quorum", "hive01");  
        HBaseAdmin admin = new HBaseAdmin(conf);  
        if (admin.tableExists(tableName)) {  
            System.out.println("table exists, trying to recreate table......");  
            admin.disableTable(tableName);  
            admin.deleteTable(tableName);  
        }  
        System.out.println("create new table:" + tableName);  
        admin.createTable(htd);  

    }  
    public static void main(String[] args) throws Exception {  
       args = new String[] { "hdfs://hive01:8020/input/hbase" };    
        Configuration conf = HBaseConfiguration.create();  
        // conf.set("hbaser.rootdir","hdfs://bigdata:8020/hbase");  
        conf.set("hbase.zookeeper.quorum", "hive01"); 

        conf.set(TableOutputFormat.OUTPUT_TABLE, "phone_log"); 

        createHBaseTable("phone_log"); 

        Job job = Job.getInstance(conf, "LoadData");  
        job.setJarByClass(LoadData.class);  
        job.setNumReduceTasks(1);  


        // 3.2 map class  
        job.setMapperClass(LoadDataMapper.class);  
        job.setMapOutputKeyClass(LongWritable.class);  
        job.setMapOutputValueClass(Text.class);  


        // 3.3 reduce class  
        job.setReducerClass(LoadDataReducer.class);  
      //  job.setOutputKeyClass(NullWritable.class);     --不需要设置  
     //   job.setOutputValueClass(Mutation.class);     --不需要设置  



        Path inPath = new Path(args[0]);  
        FileInputFormat.addInputPath(job, inPath);  

        job.setOutputFormatClass(TableOutputFormat.class);  


        System.exit(job.waitForCompletion(true) ? 0 : 1);  

    }  
}  

运行结果:在hbase中查看

这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值