在执行MapReduce时添加参数
private void processArgs(Configuration conf2, String[] args) {
String date = "";
for (int i = 0; i < args.length; i++) {
if("-d".equals(args[i])) {
if(i+1<args.length) {
date = args[++i];
}
}
}
if(StringUtils.isBlank(date) || !TimeUtil.isValidateRunningDate(date)) {
date = TimeUtil.getYesterday();
}
conf2.set(GlobalConstants.RUNNING_DATE_PARAMES, date);
}
Hbase当做输入端调用方法:
void org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException
样例:
TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job, false);
其中的initScans()方法
private List<Scan> initScans(Job job) {
Configuration conf = job.getConfiguration();
String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
long time = TimeUtil.parseString2Long(date);
long endtime = time + GlobalConstants.DAY_OF_MILLISECONDS;
String startRow = String.valueOf(time);
String stopRow = String.valueOf(endtime);
Scan scan = new Scan();
//获取某天数据
scan.setStartRow(startRow.getBytes());
scan.setStopRow(stopRow.getBytes());
//获取事件值为e_l的数据
FilterList lists = new FilterList(FilterList.Operator.MUST_PASS_ALL);
SingleColumnValueFilter filter1 = new SingleColumnValueFilter(EventLogConstants.EVENT_LOGS_FAMILY_NAME.getBytes(), EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME.getBytes(), CompareOp.EQUAL, "e_l".getBytes());
lists.addFilter(filter1);
//获取部分列
//定义获取的列名
String[] columns = new String[] {EventLogConstants.LOG_COLUMN_NAME_UUID,
EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION,
EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, EventLogConstants.LOG_COLUMN_NAME_PLATFORM
};
lists.addFilter(getColumn(columns));
scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, EventLogConstants.HBASE_NAME_EVENT_LOGS.getBytes());//设置表名 第二个参数为自己表名
return Arrays.asList(scan);
}
private Filter getColumn(String[] columns) {
int length = columns.length;
byte[][] buffer = new byte[length][];//此处二位数组因为MultipleColumnPrefixFilter需要返回byte[][]
for (int i = 0; i < length; i++) {
buffer[i] = columns[i].getBytes();
}
return new MultipleColumnPrefixFilter(buffer);
}
下面是hbase的api操作的demo:
package com.hadoop.hbase;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.hadoop.hbase.Phone.PhoneDetail;
public class HBaseDemo {
String s = "phone";
HBaseAdmin admin; //数据库层面
HTable htable; //表层面
@SuppressWarnings("deprecation")
@Before
public void init() throws Exception {
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "master.oppo.com:2181,slave1.oppo.com:2181,slave2.oppo.com:2181");
admin = new HBaseAdmin(conf);
htable = new HTable(conf, s.getBytes());
}
@After
public void destory() throws Exception {
if(admin!=null) {
admin.close();
}
}
/**
* 创建表
* @throws Exception
*/
@Test
public void create() throws Exception {
if(admin.tableExists(s)) {
admin.disableTable(s);
admin.deleteTable(s);
}
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(s)); //设置表名
HColumnDescriptor cf = new HColumnDescriptor("cf".getBytes()); //设置列族
desc.addFamily(cf); //将列族加入表
admin.createTable(desc); //创建表
}
/**
* 插入一条数据
* @throws Exception
*/
@SuppressWarnings("deprecation")
@Test
public void insertDB() throws Exception {
String rowKey = "123";
Put put = new Put(rowKey.getBytes());//创建PUT对象并传入行键
put.add("cf".getBytes(), "name".getBytes(), "xxx123".getBytes());//添加列族
put.add("cf".getBytes(), "age".getBytes(), "11111".getBytes());
htable.put(put);
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
/**
* 通过List一次提交多个put
* @throws Exception
*/
@SuppressWarnings("deprecation")
@Test
public void insertDB2() throws Exception {
List<Put> puts = new ArrayList<Put>();
for (int i = 0; i < 10; i++) {
String phoneNum = getPhoneNum("186");
for (int j = 0; j < 100; j++) {
String dnum = getPhoneNum("158");
String length = r.nextInt(99) + "";
String type = r.nextInt(2)+"";
String dataStr = getDate("2018");
String rowkey = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse(dataStr).getTime());
Put put = new Put(rowkey.getBytes());
put.add("cf".getBytes(), "dnum".getBytes(), dnum.getBytes());
put.add("cf".getBytes(), "length".getBytes(), length.getBytes());
put.add("cf".getBytes(), "type".getBytes(), type.getBytes());
put.add("cf".getBytes(), "dataStr".getBytes(), dataStr.getBytes());
puts.add(put);
}
}
htable.put(puts);
}
@Test
public void getDB() throws Exception {
String row = "123";
Get get = new Get(row.getBytes());
Result rs = htable.get(get);
Cell cell = rs.getColumnLatestCell("cf".getBytes(), "name".getBytes());
System.out.println(new String(CellUtil.cloneValue(cell)));
System.out.println(new String(CellUtil.cloneFamily(cell)));
System.out.println(new String(CellUtil.cloneQualifier(cell)));
}
@Test
public void getDB2() throws Exception{
Get get = new Get("18699976538_9223370509321350807".getBytes());
Result result = htable.get(get);
int count = 0;
Cell cell = result.getColumnLatestCell("cf".getBytes(), "day".getBytes());
Phone.dayPhoneDetail dayPhone = Phone.dayPhoneDetail.parseFrom(CellUtil.cloneValue(cell));
for (PhoneDetail pd : dayPhone.getDayPhoneDetailList()) {
System.out.println(pd.getDate() + "-" + pd.getDnum() + "-" + pd.getLength() + "-" + pd.getType());
count++;
}
System.out.println(count);
}
/**
* 计数
* @throws Exception
*/
@Test
public void getCount() throws Exception {
String row = "123";
int count = 0;
Get get = new Get(row.getBytes());
Result rs = htable.get(get);
List<Cell> list = rs.listCells();
for (Cell cell : list) {
count++;
}
// Cell cell = rs.getColumnLatestCell("cf".getBytes(), "name".getBytes());
System.out.println(count);
}
/**
* 根据条件筛选数据(通过设置startRow,stopRow)
* @throws Exception
*/
@Test
public void scan() throws Exception {
String phoneNum = "18697176576";
String startRow = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse("20180301000000").getTime());
String stopRow = phoneNum+ "_" +(Long.MAX_VALUE - sdf.parse("20180201000000").getTime());
Scan scan = new Scan();
scan.setStartRow(startRow.getBytes());
scan.setStopRow(stopRow.getBytes());
ResultScanner rss = htable.getScanner(scan);
for (Result rs : rss) {
System.out.print(new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dnum".getBytes()))));
System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "length".getBytes()))));
System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "type".getBytes()))));
System.out.println("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dataStr".getBytes()))));
}
}
/**
* 通过过滤器筛选多个条件
* @throws Exception
*/
@Test
public void scan2() throws Exception {
FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL);
PrefixFilter filter1 = new PrefixFilter("18697176576".getBytes());//前缀过滤器
SingleColumnValueFilter filter2 = new SingleColumnValueFilter(
"cf".getBytes(),
"type".getBytes(),
CompareOp.EQUAL,
"1".getBytes()
);//四个参数分别为列族,列,比较条件,与其比较的值
list.addFilter(filter1);
list.addFilter(filter2);
Scan scan = new Scan();
scan.setFilter(list);
ResultScanner rss = htable.getScanner(scan);
for (Result rs : rss) {
System.out.print(new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dnum".getBytes()))));
System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "length".getBytes()))));
System.out.print("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "type".getBytes()))));
System.out.println("-" + new String(CellUtil.cloneValue(rs.getColumnLatestCell("cf".getBytes(), "dataStr".getBytes()))));
}
}
private String getDate2(String year) {
return year + String.format("%02d%02d%02d", new Object[] {r.nextInt(24), r.nextInt(60), r.nextInt(60)});
}
private String getDate(String year) {
return year + String.format("%02d%02d%02d%02d%02d", new Object[] {r.nextInt(12)+1, r.nextInt(31)+1, r.nextInt(24), r.nextInt(60), r.nextInt(60)});
}
Random r = new Random();
private String getPhoneNum(String string) {
return string + String.format("%08d", r.nextInt(99999999));
}
}
通过过滤器筛选多个条件时FilterList有两种选项
FilterList.Operator.MUST_PASS_ALL
FilterList.Operator.MUST_PASS_ONE
SingleColumnValueFilter 中的比较条件
条件 | 解释 |
---|---|
LESS | less than |
LESS_OR_EQUAL | less than or equal to |
EQUAL | equals |
NOT_EQUAL | not equal |
GREATER_OR_EQUAL | greater than or equal to |
GREATER | greater than |
NO_OP | no operation |