hadoop中实现两表结合 MapReduce中map过程获取操作的文件名

当MapReduce中map阶段同时多个文件时候为了区分数据来自哪个文件就要求获取该文件的文件名。
补充map中重写setup方法//在map任务开始之前执行。
表一order:
order001,u006
order002,u006
order003,u005
order004,u006
order005,u003
order006,u002
表二:
u001,senge,18,male,angelababy
u002,58,male,ruhua
u003,shuaishuai,16,female,chunge
u004,laoyang,28,female,zengge
u005,nana,24,female,huangbo
u006,dingding,19,male,taojiji
实现两表结合 需要创建一个新的类并实现序列化和反序列化用来接收结合好的数据。
public class JoinBean implements Writable {
private String oid;
private String uid;
private String name;
private int age;
private String gender;
private String friend;
// 标识封装的数据信息
private String table ;

public String getOid() {
		return oid;
	}

	public void setOid(String oid) {
		this.oid = oid;
	}

	public String getUid() {
		return uid;
	}

	public void setUid(String uid) {
		this.uid = uid;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public int getAge() {
		return age;
	}

	public void setAge(int age) {
		this.age = age;
	}

	public String getGender() {
		return gender;
	}

	public void setGender(String gender) {
		this.gender = gender;
	}

	public String getFriend() {
		return friend;
	}

	public void setFriend(String friend) {
		this.friend = friend;
	}

	public String getTable() {
		return table;
	}

	public void setTable(String table) {
		this.table = table;
	}

@Override
	public String toString() {
		return "JoinBean [oid=" + oid + ", uid=" + uid + ", name=" + name + ", age=" + age + ", gender=" + gender
				+ ", friend=" + friend + ", table=" + table + "]";
	}

/**
 * 序列化 注意类型和顺序
 */
@Override
public void write(DataOutput jout) throws IOException {
	jout.writeUTF(this.oid);
	jout.writeUTF(this.uid);
	jout.writeUTF(this.name);
	jout.writeInt(this.age);
	jout.writeUTF(this.gender);
	jout.writeUTF(this.friend);
	jout.writeUTF(this.table);

}

/**
 * 反序列化 注意类型和顺序
 */
@Override
public void readFields(DataInput jin) throws IOException {
	oid = jin.readUTF();
	uid = jin.readUTF();
	name = jin.readUTF();
	age = jin.readInt();
	gender = jin.readUTF();
	friend = jin.readUTF();
	table = jin.readUTF();
}

}

hadoop中实现两表结合实现代码:
map:
public class JoinMapper extends Mapper<LongWritable, Text, Text, JoinBean>{
String name = null;
/**
* 执行map前调用setup方法
* 获取操作的文件名
*/
@Override
protected void setup(Mapper<LongWritable, Text, Text, JoinBean>.Context context)
throws IOException, InterruptedException {
FileSplit fs = (FileSplit)context.getInputSplit();//InputSplit为抽象接口,强转成其实现类
//获取文件名,若未不同文件夹下文件只获取路径即可
name = fs.getPath().getName();
}

Text k = new Text();
JoinBean join = new JoinBean();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, JoinBean>.Context context)
		throws IOException, InterruptedException {
	
	try {
		String line = value.toString();
		String[] sp = line.split(",");
		if (name!=null&&name.startsWith("order")) {//注意空属性也要set
			String oid = sp[0];
			String uid = sp[1];
			join.setOid(oid);
			join.setUid(uid);
			join.setName("");
			join.setAge(0);
			join.setGender("");
			join.setFriend("");
			String table = "order";
			join.setTable(table);
			k.set(uid);
		}else {
			
			String uid = sp[0];
			String name = sp[1];
			int age = Integer.parseInt(sp[2]);
			String gender = sp[3];
			String friend = sp[4];
			join.setUid(uid);
			join.setName(name);
			join.setAge(age);
			join.setGender(gender);
			join.setFriend(friend);
			join.setOid("");
			String table = "user";
			join.setTable(table);
			k.set(uid);
		}
		context.write(k, join);
	} catch (Exception e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}

}

reduce:
public class JoinReducer extends Reducer<Text, JoinBean, Text, NullWritable> {

Text k = new Text();

@Override
protected void reduce(Text key, Iterable<JoinBean> iters,
		Reducer<Text, JoinBean, Text, NullWritable>.Context context) throws IOException, InterruptedException {
	try {
		ArrayList<JoinBean> orderlist = new ArrayList<JoinBean>();
		JoinBean user = new JoinBean();
		for (JoinBean joinBean : iters) {
			String table = joinBean.getTable();
			if ("order".equals(table)) {//order表中的数据
				JoinBean orders = new JoinBean();//直接添加会覆盖前面前面数据
				BeanUtils.copyProperties(orders, joinBean);
				orderlist.add(orders);

			} else {//user表中的数据
				BeanUtils.copyProperties(user, joinBean);
			}
		}
		//
		for (JoinBean ot : orderlist) {
			String ki = ot.getOid() + ":" + ot.getUid() + ":" + user.getName() + ":" + user.getAge() + ":"
					+ user.getGender() + ":" + user.getFriend();
			k.set(ki);
			context.write(k, NullWritable.get());
		}
	} catch (Exception e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}

}

存在问题:当个别用户产生的订单较多时会发生数据倾斜问题。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值