hbase 下mapreduce 读取hbase中数据

最新推荐文章于 2024-10-25 21:03:13 发布

原创最新推荐文章于 2024-10-25 21:03:13 发布 · 3.8k 阅读

3 ·

CC 4.0 BY-SA版权

文章标签：

#hbse #hadoop #mapreduce

本文展示了如何在HBase中编写MR程序，并通过提供的数据实例进行演示。主要涉及了从HBase中读取数据，使用MapReduce进行处理，并输出结果。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

简单小程序演示如何在Hbase 中编写mr程序：

hbase 中已经存在的数据：

hbase(main):009:0> scan 'test'
ROW                           COLUMN+CELL                                                                       
 row1                         column=li:a, timestamp=1386254343222, value=liang                                 
 row1                         column=li:b, timestamp=1386253305942, value=\x00\x00\x00\x03                      
 row1                         column=li:c, timestamp=1386256955011, value=liang                                 
 row2                         column=li:a, timestamp=1386255985261, value=liang                                 
 row3                         column=li:a, timestamp=1386256003938, value=lei                                   
 row4                         column=li:a, timestamp=1386256057937, value=lei                                   
 row5                         column=li:a, timestamp=1386256064945, value=lei                                   
 row6                         column=li:a, timestamp=1386256226767, value=lei                                   
 row7                         column=li:a, timestamp=1386256230868, value=lei                                   
 row8                         column=li:a, timestamp=1386256234817, value=lei

测试代码如下：

package com.hbase.create;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * this file ,containing the map and reduce ,which is prepared for the crawler program 
 * for realizing the goal of storing image files in the hdfs.
 * @author lloppo
 *
 */
public class MapReduce_Hbase_DownLoad extends Configured implements Tool{

	public static void main(String[] args) throws Exception {
		ToolRunner.run(new MapReduce_Hbase_DownLoad(), args);
	}
	
    
	static class  MyMapper extends TableMapper<Text, LongWritable>{
    	public void map(ImmutableBytesWritable row, Result value, Context context) 
    			throws InterruptedException, IOException {
//            for(KeyValue kv:value.raw()){
//                System.out.print(new String(kv.getRow()));
//                System.out.println(new String(kv.getValue()));
//             }
    		for (int i = 0; i < value.size(); i++) {
    			String val =  new String(value.getValue(Bytes.toBytes("li"), Bytes.toBytes("a")));
    			System.out.println("value :"+val);
			}
    	}
    }


	public int run(String[] args) throws Exception {
    	Configuration config = HBaseConfiguration.create();
    	try {
			Job job = new Job(config,"ExampleReadWrite");
			job.setJarByClass(MapReduce_Hbase_DownLoad.class);     // class that contains mapper
			Scan scan = new Scan();
			scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
			scan.setCacheBlocks(false);
			TableMapReduceUtil.initTableMapperJob(
					  "test",        // input HBase table name
					  scan,             // Scan instance to control CF and attribute selection
					  MyMapper.class,   // mapper
					  null,             // mapper output key 
					  null,             // mapper output value
					  job);
					job.setOutputFormatClass(NullOutputFormat.class);   // because we aren't emitting anything from mapper
					boolean b = job.waitForCompletion(true);
					if (!b) {
					  throw new IOException("error with job!");
					}
    	} catch (Exception e) {
			e.printStackTrace();
		}
		return 0;
	}
}