ReduceJoinMapper:
import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class ReduceJoinMapper extends Mapper<LongWritable,Text,Text,Text> {
Text text = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//可以判断数据是从哪个文件里面来的
//获取文件的切片
FileSplit inputSplit = (FileSplit) context.getInputSplit();
//获取到了我们的文件名
String name = inputSplit.getPath().getName();
String s = value.toString();
if(s.startsWith("p")){
//表示我们拿到的是商品表的数据
/**
* p0001,小米5,1000,2000
p0002,锤子T1,1000,3000
*/
String[] split = s.split(",");
text.set(split[0]);
context.write(text,value);
}else{
//这个文件是订单表的数据
/**
* 1001,20150710,p0001,2
1002,20150710,p0002,3
1002,20150710,p0003,3
*/
String[] split = s.split(",");
text.set(split[2]);
context.write(text,value);
}
}
}
ReduceJoinReducer:
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class ReduceJoinReducer extends Reducer<Text,Text,Text,Text> {
/**
* 商品表数据
pid
p0001,小米5,1000,2000
p0002,锤子T1,1000,3000
订单表数据
pid
1001,20150710,p0001,2
1002,20150710,p0002,3
1002,20150710,p0003,3
* @param key
* @param values
* @param context
* @throws IOException
* @throws InterruptedException
*
* p0001 p0001,小米5,1000,2000 1001,20150710,p0001,2
* p0002 p0002,锤子T1,1000,3000 1002,20150710,p0002,3
* p0003 1002,20150710,p0003,3
*/
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String firstPart ="";
String secondPart = "";
for (Text value : values) {
if(value.toString().startsWith("p")){
firstPart = value.toString();
}else{
secondPart = value.toString();
}
}
context.write(key,new Text(firstPart+"\t"+secondPart));
}
}
ReduceJoinMain:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class ReduceJoinMain extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(super.getConf(), "redcueJoin");
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path("file:///F:\\map端join\\input"));
job.setMapperClass(ReduceJoinMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(ReduceJoinReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path("file:///F:\\map\\join\\reduce_join_output"));
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new Configuration(), new ReduceJoinMain(), args);
System.exit(run);
}
}
order.txt
1001,20150710,p0001,2
1002,20150710,p0002,3
1002,20150710,p0003,3
product.txt
p0001,小米5,1000,2000
p0002,锤子T1,1000,3000
结果:
p0001 p0001,小米5,1000,2000 1001,20150710,p0001,2
p0002 p0002,锤子T1,1000,3000 1002,20150710,p0002,3
p0003 1002,20150710,p0003,3