先写一个实体类
package com.kgc.myhdfs001.join;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/*
两张表custmoers表和orders表
customers表里面有userid、username、age
orders表里面有orderid、userid、goodid、buynum
*/
public class CustomOrder implements Writable {
private String userid="";
private String username="";
private String age="";
private String orderid="";
private String goodid="";
private String buynum="";
private String flag="";//标志属性 区分你写入的表是customers还是orders 1:orders 0:customers
@Override
//重写序列化方法
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeUTF(userid);
dataOutput.writeUTF(username);
dataOutput.writeUTF(age);
dataOutput.writeUTF(orderid);
dataOutput.writeUTF(goodid);
dataOutput.writeUTF(buynum);
dataOutput.writeUTF(flag);
}
@Override
//重写反序列化方法
//注意反序列化的顺序和序列化的顺序完全一致
public void readFields(DataInput dataInput) throws IOException {
userid=dataInput.readUTF();
username=dataInput.readUTF();
age=dataInput.readUTF();
orderid=dataInput.readUTF();
goodid=dataInput.readUTF();
buynum=dataInput.readUTF();
flag=dataInput.readUTF();
}
public CustomOrder() {
}
public CustomOrder(String userid, String username, String age, String orderid, String goodid, String buynum, String flag) {
this.userid = userid;
this.username = username;
this.age = age;
this.orderid = orderid;
this.goodid = goodid;
this.buynum = buynum;
this.flag = flag;
}
@Override
public String toString() {
return "CustomOrder{" +
"userid='" + userid + '\'' +
", username='" + username + '\'' +
", age='" + age + '\'' +
", orderid='" + orderid + '\'' +
", goodid='" + goodid + '\'' +
", buynum='" + buynum + '\'' +
", flag='" + flag + '\'' +
'}';
}
public String getUserid() {
return userid;
}
public void setUserid(String userid) {
this.userid = userid;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getAge() {
return age;
}
public void setAge(String age) {
this.age = age;
}
public String getOrderid() {
return orderid;
}
public void setOrderid(String orderid) {
this.orderid = orderid;
}
public String getGoodid() {
return goodid;
}
public void setGoodid(String goodid) {
this.goodid = goodid;
}
public String getBuynum() {
return buynum;
}
public void setBuynum(String buynum) {
this.buynum = buynum;
}
public String getFlag() {
return flag;
}
public void setFlag(String flag) {
this.flag = flag;
}
}
然后写JoinedMapper
package com.kgc.myhdfs001.join;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
public class JoinedMapper extends Mapper<LongWritable, Text,Text,CustomOrder> {
String fileName;
@Override
//初始化方法
protected void setup(Context context) throws IOException, InterruptedException {
//获得用户读入的文件信息
FileSplit fs= (FileSplit)context.getInputSplit();
//将文件名存放到全局变量
fileName=fs.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//准备一个CustomOrder对象根据不同的文件名填充不同的属性
CustomOrder co=new CustomOrder();
//分割字符串
String[] infos = value.toString().split(",");
if(fileName.startsWith("order")){
co.setOrderid(infos[0]);
co.setUserid(infos[1]);
co.setGoodid(infos[2]);
co.setBuynum(infos[3]);
co.setFlag("1");
}else {
co.setUserid(infos[0]);
co.setUsername(infos[1]);
co.setAge(infos[2]);
co.setFlag("0");
}
//设置mapper输出的key是用户userid
context.write(new Text(co.getUserid()),co);
}
}
然后写JoinedReducer
package com.kgc.myhdfs001.join;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class JoinedReducer extends Reducer<Text,CustomOrder,CustomOrder, NullWritable> {
@Override
protected void reduce(Text key, Iterable<CustomOrder> values, Context context) throws IOException, InterruptedException {
CustomOrder fillCs=new CustomOrder();
for(CustomOrder co:values){
if(co.getFlag().equals("0")){
fillCs.setUserid(co.getUserid());
fillCs.setUsername(co.getUsername());
fillCs.setAge(co.getAge());
}else {
fillCs.setOrderid(co.getOrderid());
fillCs.setGoodid(co.getGoodid());
fillCs.setBuynum(co.getBuynum());
}
}
context.write(fillCs,NullWritable.get());
}
}
最后写主Job类
package com.kgc.myhdfs001.join;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JoinedJob {
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(JoinedJob.class);
Path path=new Path("file:///d:/temp02/write/");
if(path.getFileSystem(conf).exists(path)){
path.getFileSystem(conf).delete(path,true);
}
FileInputFormat.setInputPaths(job,new Path("file:///d:/temp02/read/"));
FileOutputFormat.setOutputPath(job,new Path("file:///d:/temp02/write/"));
job.setMapperClass(JoinedMapper.class);
//指定本次mr程序map阶段的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(CustomOrder.class);
//指定本次mr程序的reducer
job.setReducerClass(JoinedReducer.class);
//指定本次mr程序reduce阶段的输出类型
job.setOutputKeyClass(CustomOrder.class);
job.setOutputValueClass(NullWritable.class);
job.waitForCompletion(true);
}
}