MapReduce 组合模式(ControlledJob),链式模式(ChainMapper,ChainReducer),迭代模式

本文介绍MapReduce的不同应用场景,包括迭代式处理、依赖组合处理及链式处理的方法与实现。通过具体示例展示了如何配置和执行这些复杂任务。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

MrType2测试数据:

张三:11
张三:12
李四:10
王五:10
李四:11
王五:11


MrType3测试数据:

shoe	121
t-shirt	20
basketball	1200


package com.learn.mr;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


/**
 * 组合 迭代 链式
 * 
 */
public class MrType extends Configuration implements Tool {
	/**
	 * 1.迭代式MapReduce
	 * 一些复杂的任务难以用一次mapreduce处理完成,需要多次mapreduce才能完成任务,例如PageRank,Kmeans算法都需要多次的迭代
	 * ,关于mapreduce迭代
	 * 在mahout中运用较多.在mapreduce迭代过程中,思想还是比较简单,就像类似for循环一样,前一个mapreduce的输出结果,
	 * 作为下一个mapreduce的输入,任务完成后中间结果都可以删除
	 * 
	 * MapReduce迭代的问题 每次迭代都需要重新初始化Job,重新申请资源 每次迭代,之间的数据交互都需要写入I/O
	 * 
	 * 对于以上两点,常用的阶段办法就是 Tez
	 */
	public static void main(String[] args) {
		try {
			//ToolRunner.run(new MrType(), args);
			ToolRunner.run(new MrType2(), args);
			//ToolRunner.run(new MrType3(), args);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	@Override
	public void setConf(Configuration conf) {
	}

	@Override
	public Configuration getConf() {
		return null;
	}

	static class MyMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			value.set(value.toString() + 1);
			context.write(key, value);
		}
	}

	static class MyReducer extends Reducer<LongWritable, Text, LongWritable, Text> {
		@Override
		protected void reduce(LongWritable key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
			Text t = new Text();
			for (Text t1 : value) {
				t.set(t.toString() + t1.toString());
			}
			context.write(key, t);
		}
	}

	@Override
	public int run(String[] args) throws Exception {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(new URI(args[0]), conf);
		Path path = new Path(args[1]);
		if (fs.exists(path)) {
			fs.delete(path, true);
		}

		Job job = new Job(conf, MrType.class.getSimpleName());
		FileInputFormat.setInputPaths(job, args[0]);
		job.setInputFormatClass(TextInputFormat.class);

		job.setMapperClass(MyMapper.class);
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);

		job.setPartitionerClass(HashPartitioner.class);
		job.setNumReduceTasks(1);

		// job.setCombinerClass(null);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);

		FileOutputFormat.setOutputPath(job, path);
		job.setOutputFormatClass(TextOutputFormat.class);

		job.waitForCompletion(true);

		Job job2 = new Job(conf, MrType.class.getSimpleName());
		FileInputFormat.setInputPaths(job2, args[1]);
		job2.setInputFormatClass(TextInputFormat.class);

		job2.setMapperClass(MyMapper.class);
		job2.setMapOutputKeyClass(LongWritable.class);
		job2.setMapOutputValueClass(Text.class);

		job2.setPartitionerClass(HashPartitioner.class);
		job2.setNumReduceTasks(1);

		job2.setReducerClass(MyReducer.class);
		job2.setOutputKeyClass(LongWritable.class);
		job2.setOutputValueClass(Text.class);

		if (fs.exists(new Path(args[2]))) {
			fs.delete(new Path(args[2]), true);
		}
		FileOutputFormat.setOutputPath(job2, new Path(args[2]));
		job2.setOutputFormatClass(TextOutputFormat.class);

		job2.waitForCompletion(true);

		return 0;
	}
}


/**
 * 依赖组合
 */
class MrType2 extends Configuration implements Tool {

	static class SumMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
		Text k1 = new Text();
		IntWritable v1 = new IntWritable();

		@Override
		protected void setup(Context context) throws IOException, InterruptedException {

		}

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] arrays = value.toString().split(":");
			k1.set(arrays[0]);
			v1.set(Integer.parseInt(arrays[1]));
			context.write(k1, v1);
		}
	}

	static class SumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		IntWritable v1 = new IntWritable();

		@Override
		protected void reduce(Text key, Iterable<IntWritable> value, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable i : value) {
				sum += i.get();
			}
			v1.set(sum);
			context.write(key, v1);
		}
	}

	static class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
		IntWritable k1 = new IntWritable();
		Text v1 = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String arrays[] = value.toString().split("	");
			k1.set(Integer.parseInt(arrays[1]));
			v1.set(arrays[0]);
			context.write(k1, v1);
		}
	}

	static class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
		@Override
		protected void reduce(IntWritable key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
			for (Text t : value) {
				context.write(t, key);
			}
		}
	}

	static class SortClass extends WritableComparator {
		public SortClass() {
			super(IntWritable.class, true);// 注册排序组件
		}

		@Override
		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			System.out.println("==b1=="+b1);
			System.out.println("==s1=="+s1);
			System.out.println("==l1=="+l1);
			System.out.println("==b2=="+b2);
			System.out.println("==s2=="+s2);
			System.out.println("==l2=="+l2);
			
			return -super.compare(b1, s1, l1, b2, s2, l2);
		}

		@Override
		public int compare(Object a, Object b) {
			System.out.println("=========================");
			System.out.println(a);
			System.out.println("------------------------");
			System.out.println(b);
			return -super.compare(a, b);
		}
	}

	@Override
	public void setConf(Configuration conf) {
	}

	@Override
	public Configuration getConf() {
		return null;
	}

	/**
	 * 依赖关系组合式:
	 */
	@SuppressWarnings("deprecation")
	@Override
	public int run(String[] args) throws Exception {
		Job job = new Job(new Configuration(), "job1");
		job.setMapperClass(SumMapper.class);
		job.setReducerClass(SumReducer.class);
		FileInputFormat.setInputPaths(job, args[0]);
		FileSystem fs = FileSystem.get(new URI(args[0]), job.getConfiguration());
		Path path = new Path(args[1]);
		if (fs.exists(path)) {
			fs.delete(path, true);
		}
		FileOutputFormat.setOutputPath(job, path);
		job.setInputFormatClass(TextInputFormat.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);

		Job job2 = new Job(new Configuration(), "job2");
		job2.setInputFormatClass(TextInputFormat.class);
		job2.setMapperClass(SortMapper.class);
		job2.setReducerClass(SortReducer.class);
		job2.setSortComparatorClass(SortClass.class);
		FileInputFormat.setInputPaths(job2, args[1]);
		Path path2 = new Path(args[2]);
		if (fs.exists(path2)) {
			fs.delete(path2, true);
		}
		FileOutputFormat.setOutputPath(job2, path2);
		job2.setMapOutputKeyClass(IntWritable.class);
		job2.setMapOutputValueClass(Text.class);
		job2.setOutputKeyClass(Text.class);
		job2.setOutputValueClass(IntWritable.class);

		ControlledJob controll = new ControlledJob(job.getConfiguration());
		controll.setJob(job);

		ControlledJob controll2 = new ControlledJob(job2.getConfiguration());
		controll2.setJob(job2);

		controll2.addDependingJob(controll);

		JobControl jc = new JobControl("jc-test");
		jc.addJob(controll);
		jc.addJob(controll2);

		/** 由于JobControl实现了Runnable接口,而Runnable接口只有运行方法,没有结束方法,因此需要一个线程来辅助 */
		/** 如果不通过Thread运行,就会导致Hadoop中所有Job执行完毕之后,最后不会退出 */
		Thread jcThread = new Thread(jc);
		jcThread.start();
		while (true) {
			// 当job池里所有的Job完成后,执行 下一步操作
			if (jc.allFinished()) {
				System.out.println(jc.getSuccessfulJobList());
				jc.stop();
				return 0;
			}
			if (jc.getFailedJobList().size() > 0) {
				System.out.println(jc.getFailedJobList());
				jc.stop();
				return 1;
			}
		}
	}
}

/**
 * 链式MapReduce
 * 链式MapReduce的执行规则:整改Job中只能有一个Reducer,在Reducer前面可以有一个或者多个Mapper,在Reducer的后面可以有0个或者多个Mapper
 */
class MrType3 extends Configuration implements Tool{
	
	static class FirstMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			String arrays[]=value.toString().split("	");
			int num=Integer.parseInt(arrays[1]);
			if(num<10000){
				context.write(new Text(arrays[0]),new IntWritable(Integer.parseInt(arrays[1])));
			}
		}
	}
	static class SecondMapper extends Mapper<Text,IntWritable,Text,IntWritable>{
		@Override
		protected void map(Text key, IntWritable value,Context context)
				throws IOException, InterruptedException {
			if(value.get()<=100){
				context.write(key, value);
			}
		}
	}
	static class FirstReduce extends Reducer<Text,IntWritable,Text,IntWritable>{
		@Override
		protected void reduce(Text key, Iterable<IntWritable> value,Context context) throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable i:value){
				sum+=i.get();
			}
			context.write(key,new IntWritable(sum));
		}
	}
	static class ThirdMapper extends Mapper<Text,IntWritable,Text,IntWritable>{
		@Override
		protected void map(Text key, IntWritable value,Context context)
				throws IOException, InterruptedException {
			if(key.toString().length()<=8){
				context.write(key, value);
			}
		}
	}
	
	@Override
	public void setConf(Configuration conf) {
	}

	@Override
	public Configuration getConf() {
		return null;
	}

	@Override
	public int run(String[] args) throws Exception {
		Configuration conf=new Configuration();
		FileSystem fs=FileSystem.get(new URI(args[0]),conf);
		Path path=new Path(args[1]);
		if(fs.exists(path)){
			fs.delete(path,true);
		}
		Job job=new Job(conf,MrType3.class.getSimpleName());
		FileInputFormat.setInputPaths(job,args[0]);
		job.setInputFormatClass(TextInputFormat.class);
		
		ChainMapper.addMapper(job,FirstMapper.class,LongWritable.class,Text.class,Text.class,IntWritable.class,conf);
		ChainMapper.addMapper(job,SecondMapper.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
		ChainReducer.setReducer(job,FirstReduce.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
		ChainMapper.addMapper(job,ThirdMapper.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		job.setPartitionerClass(HashPartitioner.class);
		job.setNumReduceTasks(1);
		
		FileOutputFormat.setOutputPath(job,path);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		return job.waitForCompletion(true)?0:1;
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值