MrType2测试数据:
张三:11
张三:12
李四:10
王五:10
李四:11
王五:11
shoe 121
t-shirt 20
basketball 1200
package com.learn.mr;
import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 组合 迭代 链式
*
*/
public class MrType extends Configuration implements Tool {
/**
* 1.迭代式MapReduce
* 一些复杂的任务难以用一次mapreduce处理完成,需要多次mapreduce才能完成任务,例如PageRank,Kmeans算法都需要多次的迭代
* ,关于mapreduce迭代
* 在mahout中运用较多.在mapreduce迭代过程中,思想还是比较简单,就像类似for循环一样,前一个mapreduce的输出结果,
* 作为下一个mapreduce的输入,任务完成后中间结果都可以删除
*
* MapReduce迭代的问题 每次迭代都需要重新初始化Job,重新申请资源 每次迭代,之间的数据交互都需要写入I/O
*
* 对于以上两点,常用的阶段办法就是 Tez
*/
public static void main(String[] args) {
try {
//ToolRunner.run(new MrType(), args);
ToolRunner.run(new MrType2(), args);
//ToolRunner.run(new MrType3(), args);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public void setConf(Configuration conf) {
}
@Override
public Configuration getConf() {
return null;
}
static class MyMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
value.set(value.toString() + 1);
context.write(key, value);
}
}
static class MyReducer extends Reducer<LongWritable, Text, LongWritable, Text> {
@Override
protected void reduce(LongWritable key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
Text t = new Text();
for (Text t1 : value) {
t.set(t.toString() + t1.toString());
}
context.write(key, t);
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI(args[0]), conf);
Path path = new Path(args[1]);
if (fs.exists(path)) {
fs.delete(path, true);
}
Job job = new Job(conf, MrType.class.getSimpleName());
FileInputFormat.setInputPaths(job, args[0]);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setPartitionerClass(HashPartitioner.class);
job.setNumReduceTasks(1);
// job.setCombinerClass(null);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, path);
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
Job job2 = new Job(conf, MrType.class.getSimpleName());
FileInputFormat.setInputPaths(job2, args[1]);
job2.setInputFormatClass(TextInputFormat.class);
job2.setMapperClass(MyMapper.class);
job2.setMapOutputKeyClass(LongWritable.class);
job2.setMapOutputValueClass(Text.class);
job2.setPartitionerClass(HashPartitioner.class);
job2.setNumReduceTasks(1);
job2.setReducerClass(MyReducer.class);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(Text.class);
if (fs.exists(new Path(args[2]))) {
fs.delete(new Path(args[2]), true);
}
FileOutputFormat.setOutputPath(job2, new Path(args[2]));
job2.setOutputFormatClass(TextOutputFormat.class);
job2.waitForCompletion(true);
return 0;
}
}
/**
* 依赖组合
*/
class MrType2 extends Configuration implements Tool {
static class SumMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text k1 = new Text();
IntWritable v1 = new IntWritable();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] arrays = value.toString().split(":");
k1.set(arrays[0]);
v1.set(Integer.parseInt(arrays[1]));
context.write(k1, v1);
}
}
static class SumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
IntWritable v1 = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> value, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable i : value) {
sum += i.get();
}
v1.set(sum);
context.write(key, v1);
}
}
static class SortMapper extends Mapper<LongWritable, Text, IntWritable, Text> {
IntWritable k1 = new IntWritable();
Text v1 = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String arrays[] = value.toString().split(" ");
k1.set(Integer.parseInt(arrays[1]));
v1.set(arrays[0]);
context.write(k1, v1);
}
}
static class SortReducer extends Reducer<IntWritable, Text, Text, IntWritable> {
@Override
protected void reduce(IntWritable key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
for (Text t : value) {
context.write(t, key);
}
}
}
static class SortClass extends WritableComparator {
public SortClass() {
super(IntWritable.class, true);// 注册排序组件
}
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
System.out.println("==b1=="+b1);
System.out.println("==s1=="+s1);
System.out.println("==l1=="+l1);
System.out.println("==b2=="+b2);
System.out.println("==s2=="+s2);
System.out.println("==l2=="+l2);
return -super.compare(b1, s1, l1, b2, s2, l2);
}
@Override
public int compare(Object a, Object b) {
System.out.println("=========================");
System.out.println(a);
System.out.println("------------------------");
System.out.println(b);
return -super.compare(a, b);
}
}
@Override
public void setConf(Configuration conf) {
}
@Override
public Configuration getConf() {
return null;
}
/**
* 依赖关系组合式:
*/
@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
Job job = new Job(new Configuration(), "job1");
job.setMapperClass(SumMapper.class);
job.setReducerClass(SumReducer.class);
FileInputFormat.setInputPaths(job, args[0]);
FileSystem fs = FileSystem.get(new URI(args[0]), job.getConfiguration());
Path path = new Path(args[1]);
if (fs.exists(path)) {
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, path);
job.setInputFormatClass(TextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
Job job2 = new Job(new Configuration(), "job2");
job2.setInputFormatClass(TextInputFormat.class);
job2.setMapperClass(SortMapper.class);
job2.setReducerClass(SortReducer.class);
job2.setSortComparatorClass(SortClass.class);
FileInputFormat.setInputPaths(job2, args[1]);
Path path2 = new Path(args[2]);
if (fs.exists(path2)) {
fs.delete(path2, true);
}
FileOutputFormat.setOutputPath(job2, path2);
job2.setMapOutputKeyClass(IntWritable.class);
job2.setMapOutputValueClass(Text.class);
job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(IntWritable.class);
ControlledJob controll = new ControlledJob(job.getConfiguration());
controll.setJob(job);
ControlledJob controll2 = new ControlledJob(job2.getConfiguration());
controll2.setJob(job2);
controll2.addDependingJob(controll);
JobControl jc = new JobControl("jc-test");
jc.addJob(controll);
jc.addJob(controll2);
/** 由于JobControl实现了Runnable接口,而Runnable接口只有运行方法,没有结束方法,因此需要一个线程来辅助 */
/** 如果不通过Thread运行,就会导致Hadoop中所有Job执行完毕之后,最后不会退出 */
Thread jcThread = new Thread(jc);
jcThread.start();
while (true) {
// 当job池里所有的Job完成后,执行 下一步操作
if (jc.allFinished()) {
System.out.println(jc.getSuccessfulJobList());
jc.stop();
return 0;
}
if (jc.getFailedJobList().size() > 0) {
System.out.println(jc.getFailedJobList());
jc.stop();
return 1;
}
}
}
}
/**
* 链式MapReduce
* 链式MapReduce的执行规则:整改Job中只能有一个Reducer,在Reducer前面可以有一个或者多个Mapper,在Reducer的后面可以有0个或者多个Mapper
*/
class MrType3 extends Configuration implements Tool{
static class FirstMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String arrays[]=value.toString().split(" ");
int num=Integer.parseInt(arrays[1]);
if(num<10000){
context.write(new Text(arrays[0]),new IntWritable(Integer.parseInt(arrays[1])));
}
}
}
static class SecondMapper extends Mapper<Text,IntWritable,Text,IntWritable>{
@Override
protected void map(Text key, IntWritable value,Context context)
throws IOException, InterruptedException {
if(value.get()<=100){
context.write(key, value);
}
}
}
static class FirstReduce extends Reducer<Text,IntWritable,Text,IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> value,Context context) throws IOException, InterruptedException {
int sum=0;
for(IntWritable i:value){
sum+=i.get();
}
context.write(key,new IntWritable(sum));
}
}
static class ThirdMapper extends Mapper<Text,IntWritable,Text,IntWritable>{
@Override
protected void map(Text key, IntWritable value,Context context)
throws IOException, InterruptedException {
if(key.toString().length()<=8){
context.write(key, value);
}
}
}
@Override
public void setConf(Configuration conf) {
}
@Override
public Configuration getConf() {
return null;
}
@Override
public int run(String[] args) throws Exception {
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(new URI(args[0]),conf);
Path path=new Path(args[1]);
if(fs.exists(path)){
fs.delete(path,true);
}
Job job=new Job(conf,MrType3.class.getSimpleName());
FileInputFormat.setInputPaths(job,args[0]);
job.setInputFormatClass(TextInputFormat.class);
ChainMapper.addMapper(job,FirstMapper.class,LongWritable.class,Text.class,Text.class,IntWritable.class,conf);
ChainMapper.addMapper(job,SecondMapper.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
ChainReducer.setReducer(job,FirstReduce.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
ChainMapper.addMapper(job,ThirdMapper.class,Text.class,IntWritable.class,Text.class,IntWritable.class,conf);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setPartitionerClass(HashPartitioner.class);
job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job,path);
job.setOutputFormatClass(TextOutputFormat.class);
return job.waitForCompletion(true)?0:1;
}
}