hadoop中实现两表结合 MapReduce中map过程获取操作的文件名

最新推荐文章于 2021-03-05 15:25:04 发布

大大盒子

最新推荐文章于 2021-03-05 15:25:04 发布

阅读量289

点赞数

分类专栏： eclipse 文章标签： hadoop中实现两表结合 MapReduce中获取操作的文件名

本文链接：https://blog.youkuaiyun.com/weixin_45744450/article/details/103150927

版权

eclipse 专栏收录该内容

2 篇文章

订阅专栏

当MapReduce中map阶段同时多个文件时候为了区分数据来自哪个文件就要求获取该文件的文件名。
补充map中重写setup方法//在map任务开始之前执行。
表一order：
order001,u006
order002,u006
order003,u005
order004,u006
order005,u003
order006,u002
表二：
u001,senge,18,male,angelababy
u002,58,male,ruhua
u003,shuaishuai,16,female,chunge
u004,laoyang,28,female,zengge
u005,nana,24,female,huangbo
u006,dingding,19,male,taojiji
实现两表结合需要创建一个新的类并实现序列化和反序列化用来接收结合好的数据。
public class JoinBean implements Writable {
private String oid;
private String uid;
private String name;
private int age;
private String gender;
private String friend;
// 标识封装的数据信息
private String table ;

public String getOid() {
		return oid;
	}

	public void setOid(String oid) {
		this.oid = oid;
	}

	public String getUid() {
		return uid;
	}

	public void setUid(String uid) {
		this.uid = uid;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public int getAge() {
		return age;
	}

	public void setAge(int age) {
		this.age = age;
	}

	public String getGender() {
		return gender;
	}

	public void setGender(String gender) {
		this.gender = gender;
	}

	public String getFriend() {
		return friend;
	}

	public void setFriend(String friend) {
		this.friend = friend;
	}

	public String getTable() {
		return table;
	}

	public void setTable(String table) {
		this.table = table;
	}

@Override
	public String toString() {
		return "JoinBean [oid=" + oid + ", uid=" + uid + ", name=" + name + ", age=" + age + ", gender=" + gender
				+ ", friend=" + friend + ", table=" + table + "]";
	}

/**
 * 序列化 注意类型和顺序
 */
@Override
public void write(DataOutput jout) throws IOException {
	jout.writeUTF(this.oid);
	jout.writeUTF(this.uid);
	jout.writeUTF(this.name);
	jout.writeInt(this.age);
	jout.writeUTF(this.gender);
	jout.writeUTF(this.friend);
	jout.writeUTF(this.table);

}

/**
 * 反序列化 注意类型和顺序
 */
@Override
public void readFields(DataInput jin) throws IOException {
	oid = jin.readUTF();
	uid = jin.readUTF();
	name = jin.readUTF();
	age = jin.readInt();
	gender = jin.readUTF();
	friend = jin.readUTF();
	table = jin.readUTF();
}

}

hadoop中实现两表结合实现代码：
map：
public class JoinMapper extends Mapper<LongWritable, Text, Text, JoinBean>{
String name = null;
/**
* 执行map前调用setup方法
* 获取操作的文件名
*/
@Override
protected void setup(Mapper<LongWritable, Text, Text, JoinBean>.Context context)
throws IOException, InterruptedException {
FileSplit fs = (FileSplit)context.getInputSplit();//InputSplit为抽象接口，强转成其实现类
//获取文件名，若未不同文件夹下文件只获取路径即可
name = fs.getPath().getName();
}

Text k = new Text();
JoinBean join = new JoinBean();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, JoinBean>.Context context)
		throws IOException, InterruptedException {
	
	try {
		String line = value.toString();
		String[] sp = line.split(",");
		if (name!=null&&name.startsWith("order")) {//注意空属性也要set
			String oid = sp[0];
			String uid = sp[1];
			join.setOid(oid);
			join.setUid(uid);
			join.setName("");
			join.setAge(0);
			join.setGender("");
			join.setFriend("");
			String table = "order";
			join.setTable(table);
			k.set(uid);
		}else {
			
			String uid = sp[0];
			String name = sp[1];
			int age = Integer.parseInt(sp[2]);
			String gender = sp[3];
			String friend = sp[4];
			join.setUid(uid);
			join.setName(name);
			join.setAge(age);
			join.setGender(gender);
			join.setFriend(friend);
			join.setOid("");
			String table = "user";
			join.setTable(table);
			k.set(uid);
		}
		context.write(k, join);
	} catch (Exception e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}

}

reduce：
public class JoinReducer extends Reducer<Text, JoinBean, Text, NullWritable> {

Text k = new Text();

@Override
protected void reduce(Text key, Iterable<JoinBean> iters,
		Reducer<Text, JoinBean, Text, NullWritable>.Context context) throws IOException, InterruptedException {
	try {
		ArrayList<JoinBean> orderlist = new ArrayList<JoinBean>();
		JoinBean user = new JoinBean();
		for (JoinBean joinBean : iters) {
			String table = joinBean.getTable();
			if ("order".equals(table)) {//order表中的数据
				JoinBean orders = new JoinBean();//直接添加会覆盖前面前面数据
				BeanUtils.copyProperties(orders, joinBean);
				orderlist.add(orders);

			} else {//user表中的数据
				BeanUtils.copyProperties(user, joinBean);
			}
		}
		//
		for (JoinBean ot : orderlist) {
			String ki = ot.getOid() + ":" + ot.getUid() + ":" + user.getName() + ":" + user.getAge() + ":"
					+ user.getGender() + ":" + user.getFriend();
			k.set(ki);
			context.write(k, NullWritable.get());
		}
	} catch (Exception e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}

}

存在问题：当个别用户产生的订单较多时会发生数据倾斜问题。