Hbase&MapReduce笔记

本文记录了如何在MapReduce任务中使用Hbase作为数据输入,并提供了调用Hbase API的示例,包括利用initScans()初始化扫描器以及FilterList的操作方式,如设置FilterList.Operator为MUST_PASS_ALL或MUST_PASS_ONE来实现多条件筛选。同时,介绍了SingleColumnValueFilter的比较条件应用。

在执行MapReduce时添加参数

	private void processArgs(Configuration conf2, String[] args) {
		String date = "";
		for (int i = 0; i < args.length; i++) {
			if("-d".equals(args[i])) {
				if(i+1<args.length) {
					date = args[++i];
				}
			}
		}
		if(StringUtils.isBlank(date) || !TimeUtil.isValidateRunningDate(date)) {
			date = TimeUtil.getYesterday();
		}
		conf2.set(GlobalConstants.RUNNING_DATE_PARAMES, date);
	}

Hbase当做输入端调用方法:

void org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException

样例:

TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job, false);

其中的initScans()方法

	private List<Scan> initScans(Job job) {
		
		Configuration conf = job.getConfiguration();
		String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
		long time = TimeUtil.parseString2Long(date);
		long endtime = time + GlobalConstants.DAY_OF_MILLISECONDS;
		String startRow = String.valueOf(time);
		String stopRow = String.valueOf(endtime);
		Scan scan = new Scan();
		//获取某天数据
		scan.setStartRow(startRow.getBytes());
		scan.setStopRow(stopRow.getBytes());
		
		//获取事件值为e_l的数据
		FilterList lists = new FilterList(FilterList.Operator.MUST_PASS_ALL);
		SingleColumnValueFilter filter1 = new SingleColumnValueFilter(EventLogConstants.EVENT_LOGS_FAMILY_NAME.getBytes(), EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME.getBytes(), CompareOp.EQUAL, "e_l".getBytes());
		lists.addFilter(filter1);
		//获取部分列
		//定义获取的列名
		String[] columns = new String[] {EventLogConstants.LOG_COLUMN_NAME_UUID, 
				EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION, 
				EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, EventLogConstants.LOG_COLUMN_NAME_PLATFORM
				};
		lists.addFilter(getColumn(columns));
		scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, EventLogConstants.HBASE_NAME_EVENT_LOGS.getBytes());//设置表名 第二个参数为自己表名
		return Arrays.asList(scan);
	}


	private Filter getColumn(String[] columns) {
		int length = columns.length;
		byte[][] buffer = new byte[length][];//此处二位数组因为MultipleColumnPrefixFilter需要返回byte[][]
		for (int i = 0; i < length; i++) {
			buffer[i] = columns[i].getBytes();
		}
		return new MultipleColumnPrefixFilter(buffer);
	}

下面是hbase的api操作的demo:

package com.hadoop.hbase;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import com.hadoop.hbase.Phone.PhoneDetail;

public class HBaseDemo {
	String s = "phone";
	HBaseAdmin admin; //数据库层面
	HTable htable; //表层面
	
	@SuppressWarnings("deprecation")
	@Before
	public void init() throws Exception {
		Configuration conf = new Configuration();
		conf.set("hbase.zookeeper.quorum", "master.oppo.com:2181,slave1.oppo.com:2181,slave2.oppo.com:2181");
		admin = new HBaseAdmin(conf);
		htable = new HTable(conf, s.getBytes());
	}
	@After
	public void destory() throws Exception {
		if(admin!=null) {
			admin.close();
		}
	}
	
	/**
	 * 创建表
	 * @throws Exception
	 */
	@Test
	public void create() throws Exception {
		
		if(admin.tableExists(s)) {
			admin.disableTable(s);
			admin.deleteTable(s);
		}
		HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(s)); //设置表名
		HColumnDescriptor cf = new HColumnDescriptor("cf".getBytes()); //设置列族
		desc.addFamily(cf); //将列族加入表
		admin.createTable(desc); //创建表
	}
	/**
	 * 插入一条数据
	 * @throws Exception
	 */
	
	@SuppressWarnings("deprecation")
	@Test
	public void insertDB() throws Exception {
		String rowKey = "123";
		Put put = new Put(rowKey.getBytes());//创建PUT对象并传入行键
		put.add("cf".getBytes(), "name".getBytes(), "xxx123".getBytes());//添加列族
		put.add("cf".getBytes(), "age".getBytes(), "11111".getBytes());
		htable.put(put);
	}
	
	
	SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
	
	/**
	 * 通过List一次提交多个put
	 * @throws Exception
	 */
	@SuppressWarnings("deprecation")
	@Test
	public void insertDB2() throws Exception {
		List<Put> puts = new ArrayList<Put>();
		
		for (int i = 0; i < 10; i++) {
			String phoneNum = getPhoneNum("186");
			for (int j = 0; j < 100; j++) {
				String dnum = getPhoneNum("158");
				String length = r.nextInt(99) + "";
				String type = r.nextInt(2)+"";
				String dataStr = getDate("2018");
				String rowkey = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse(dataStr).getTime());
				Put put = new Put(rowkey.getBytes());
				put.add("cf".getBytes(), "dnum".getBytes(), dnum.getBytes());
				put.add("cf".getBytes(), "length".getBytes(), length.getBytes());
				put.add("cf".getBytes(), "type".getBytes(), type.getBytes());
				put.add("cf".getBytes(), "dataStr".getBytes(), dataStr.getBytes());
				puts.add(put);
			}
		}
		htable.put(puts);
	}

	@Test
	public void getDB() throws Exception {
		String row = "123";
		Get get = new Get(row.getBytes());
		
		Result rs = htable.get(get);
		Cell cell = rs.getColumnLatestCell("cf".getBytes(), "name".getBytes());
		System.out.println(new String(CellUtil.cloneValue(cell)));
		System.out.println(new String(CellUtil.cloneFamily(cell)));
		System.out.println(new String(CellUtil.cloneQualifier(cell)));
	}
	
	@Test
	public void getDB2() throws Exception{
		Get get = new Get("18699976538_9223370509321350807".getBytes());
		Result result = htable.get(get);
		int count = 0;
		Cell cell = result.getColumnLatestCell("cf".getBytes(), "day".getBytes());
		Phone.dayPhoneDetail dayPhone = Phone.dayPhoneDetail.parseFrom(CellUtil.cloneValue(cell));
		for (PhoneDetail pd : dayPhone.getDayPhoneDetailList()) {
			System.out.println(pd.getDate() + "-" + pd.getDnum() + "-" + pd.getLength() + "-" + pd.getType());
			count++;
		}
		System.out.println(count);
	}
	/**
	 * 计数
	 * @throws Exception
	 */
	
	@Test
	public void getCount() throws Exception {
		String row = "123";
		int count = 0;
		Get get = new Get(row.getBytes());
		Result rs = htable.get(get);
		List<Cell> list = rs.listCells();
		for (Cell cell : list) {
			count++;
		}
//		Cell cell = rs.getColumnLatestCell("cf".getBytes(), "name".getBytes());
		System.out.println(count);
	}
	
	/**
	 * 根据条件筛选数据(通过设置startRow,stopRow)
	 * @throws Exception
	 */

	@Test
	public void scan() throws Exception {
		String phoneNum = "18697176576";
		String startRow = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse("20180301000000").getTime());
		String stopRow = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse("20180201000000").getTime());
		Scan scan = new Scan(); 
		scan.setStartRow(startRow.getBytes());
		scan.setStopRow(stopRow.getBytes());
		ResultScanner rss = htable.getScanner(scan);
		for (Result rs : rss) {
			System.out.print(new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dnum".getBytes()))));
			System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "length".getBytes()))));
			System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "type".getBytes()))));
			System.out.println("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dataStr".getBytes()))));
		}
	}
	
	/**
	 * 通过过滤器筛选多个条件
	 * @throws Exception
	 */
	@Test
	public void scan2() throws Exception {
		FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
		PrefixFilter filter1 = new PrefixFilter("18697176576".getBytes());//前缀过滤器
		SingleColumnValueFilter filter2 = new SingleColumnValueFilter(
				  "cf".getBytes(),
				  "type".getBytes(),
				  CompareOp.EQUAL,
				  "1".getBytes()
				  );//四个参数分别为列族,列,比较条件,与其比较的值
		list.addFilter(filter1);
		list.addFilter(filter2);
		Scan scan = new Scan();
		scan.setFilter(list);
		ResultScanner rss = htable.getScanner(scan);
		for (Result rs : rss) {
			System.out.print(new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dnum".getBytes()))));
			System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "length".getBytes()))));
			System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "type".getBytes()))));
			System.out.println("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dataStr".getBytes()))));
		}
	}
	
	
	private String getDate2(String year) {	
		return year + String.format("%02d%02d%02d", new Object[] {r.nextInt(24), r.nextInt(60), r.nextInt(60)});
	}
	private String getDate(String year) {	
		return year + String.format("%02d%02d%02d%02d%02d", new Object[] {r.nextInt(12)+1, r.nextInt(31)+1, r.nextInt(24), r.nextInt(60), r.nextInt(60)});
	}
	Random r = new Random();
	private String getPhoneNum(String string) {
		return string + String.format("%08d", r.nextInt(99999999));	
	}
}

通过过滤器筛选多个条件时FilterList有两种选项
FilterList.Operator.MUST_PASS_ALL
FilterList.Operator.MUST_PASS_ONE

SingleColumnValueFilter 中的比较条件

条件解释
LESSless than
LESS_OR_EQUALless than or equal to
EQUALequals
NOT_EQUALnot equal
GREATER_OR_EQUALgreater than or equal to
GREATERgreater than
NO_OPno operation
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值