前言
前一篇文章将了下MapJoin,其实ReduceJoin和MapJoin类似,只不过一个是在Map端匹配,一个是在Reduce端匹配,各有各的优势。
一、实现思路
和MapJoin类似,也是通过读取二个文件,文件的大小可以很大,通过FileInputFormat读取文件,读取到文件后需要获取文件的名称,通过文件名称来区分对应的是订单还是产品文件,再封装成对象输出,输出的key值为二个文件公有的产品id,从而到reduce端就可以获取到二个文件的数据,最后再进行匹配名称即可。
二、具体代码
1.OrderBean类
package com.hadoop.mapreduce.reduceJoin;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* @author LengQing
* @date 2020/5/3 - 15:20
* 实现将订单表和产品表数据合并
*/
public class OrderBean implements Writable {
private String o_id; // order表:订单id
private String p_id; // order表、product表:商品id
private double o_price; // order表:商品单价
private Integer o_num; // order表:商品数量
private String p_name; // product表:商品名称
private String flag; // 表标识
public OrderBean() {
super();
}
public OrderBean(String o_id, String p_id, double o_price, Integer o_num, String p_name, String flag) {
this.o_id = o_id;
this.p_id = p_id;
this.o_price = o_price;
this.o_num = o_num;
this.p_name = p_name;
this.flag = flag;
}
@Override
public String toString() {
return o_id + '\t' + p_name + '\t' + o_num + '\t' + o_price;
}
public String getO_id() {
return o_id;
}
public void setO_id(String o_id) {
this.o_id = o_id;
}
public String getP_id() {
return p_id;
}
public void setP_id(String p_id) {
this.p_id = p_id;
}
public double getO_price() {
return o_price;
}
public void setO_price(double o_price) {
this.o_price = o_price;
}
public Integer getO_num() {
return o_num;
}
public void setO_num(Integer o_num) {
this.o_num = o_num;
}
public String getP_name() {
return p_name;
}
public void setP_name(String p_name) {
this.p_name = p_name;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(o_id);
out.writeUTF(p_id);
out.writeDouble(o_price);
out.writeInt(o_num);
out.writeUTF(p_name);
out.writeUTF(flag);
}
@Override
public void readFields(DataInput in) throws IOException {
o_id = in.readUTF();
p_id = in.readUTF();
o_price = in.readDouble();
o_num = in .readInt();
p_name = in.readUTF();
flag = in.readUTF();
}
}
2.ReduceJoinMapper类
package com.hadoop.mapreduce.reduceJoin;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
/**
* @author LengQing
* @date 2020/5/3 - 15:07
*/
public class ReduceJoinMapper extends Mapper<LongWritable, Text, Text, OrderBean> {
private Text outputKey = new Text();
private OrderBean orderBean = new OrderBean();
private String name;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
// 1 获取输入文件切片
FileSplit fileSplit = (FileSplit)context.getInputSplit();
// 2 获取输入的文件名称
name = fileSplit.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] line = value.toString().split("\t");
if (name.startsWith("order")){ // 封装order表数据对象
this.outputKey.set(line[1]);
orderBean.setO_id(line[0]);
orderBean.setP_id(line[1]);
orderBean.setO_num(Integer.parseInt(line[2]));
orderBean.setO_price(Double.parseDouble(line[3]));
orderBean.setP_name("");
orderBean.setFlag("order");
}else{ // 封装product表数据对象
this.outputKey.set(line[0]);
orderBean.setO_id("");
orderBean.setP_id(line[0]);
orderBean.setO_price(0);
orderBean.setO_num(0);
orderBean.setP_name(line[1]);
orderBean.setFlag("product");
}
context.write(this.outputKey, orderBean);
}
}
3.ReduceJoinReduce类
package com.hadoop.mapreduce.reduceJoin;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
/**
* @author LengQing
* @date 2020/5/3 - 15:08
*/
public class ReduceJoinReduce extends Reducer<Text, OrderBean, Text, NullWritable> {
private Text outputKey = new Text();
@Override
protected void reduce(Text key, Iterable<OrderBean> values, Context context) throws IOException, InterruptedException {
// 1 pd表对象,存放pd表数据
OrderBean pdBean = new OrderBean();
int num = 0;
double price = 0.0;
for (OrderBean value : values) {
if ("order".equals(value.getFlag())){
num += value.getO_num();
price += value.getO_price();
}else{
try {
// 拷贝传递过来的产品表到内存中
BeanUtils.copyProperties(pdBean, value);
} catch (Exception e) {
e.printStackTrace();
}
}
}
outputKey.set(pdBean.getP_name() + "\t" + num + "\t" + price);
context.write(outputKey, NullWritable.get());
}
}
4.RecudeJoinDriver类
package com.hadoop.mapreduce.reduceJoin;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author LengQing
* @date 2020/5/3 - 14:29
* 需求:将order表中的按照产品编号汇总产品金额,并将产品编号替换成产品表的产品名称
*/
public class RecudeJoinDriver extends Configured implements Tool {
private Configuration conf = new Configuration();
@Override
public int run(String[] args) throws Exception {
// 实例化Job
Job job = Job.getInstance(conf, "reduceJoin");
job.setJarByClass(RecudeJoinDriver.class);
// 1 input阶段
Path inputPath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inputPath);
// 2 map阶段
job.setMapperClass(ReduceJoinMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(OrderBean.class);
// 3 shuffle阶段
// 4 reduce阶段
job.setReducerClass(ReduceJoinReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 5 output阶段
Path outputPath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputPath);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) {
try {
int status = ToolRunner.run(new RecudeJoinDriver(), args);
System.exit(status);
} catch (Exception e) {
e.printStackTrace();
}
}
}