MainClass
package com.bjsxt.mr.wordcount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MainClass {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
if (args == null || args.length != 2) {
System.out.println("Usage: yarn jar wordcount.jar com.bjsxt.mr.wordcount.MainClass <arg> <arg>");
return;
}
// 获取配置参数对象,加载默认的属性值
Configuration conf = new Configuration(true);
Job job = Job.getInstance(conf);
//设置主入口类
job.setJarByClass(MainClass.class);
job.setJobName("我的数单词");
//设置输入
FileInputFormat.addInputPath(job, new Path(args[0]));
//设置job的输出路径:job的输出路径一定是不存在的路径,如果存在,报错
FileOutputFormat.setOutputPath(job, new Path(args[1]));
//设置Mapper类
job.setMapperClass(MyMapper.class);
//设置Reducer类
job.setReducerClass(MyReducer.class);
//设置map输出key的类型:用于比较排序
job.setMapOutputKeyClass(Text.class);
//设置map输出value的类型
job.setMapOutputValueClass(LongWritable.class);
//提交作业,并等待作业的完成
job.waitForCompletion(true);
}
}
MyMapper
package com.bjsxt.mr.wordcount;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
private LongWritable valueOut = new LongWritable(1L);
private Text outKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// hello bjsxt 5
String line = value.toString();
// {"hello", "bjsxt", "5"}
String [] words = line.split(" ");
for (String word : words) {
// <"hello", 1>
outKey.set(word);
context.write(outKey, valueOut);
}
}
}
MyReducer
package com.bjsxt.mr.wordcount;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
private LongWritable outValue = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {
// <"hello", [1,1,1,1,1,1,1,1,1]>
//获取迭代器,遍历values
Iterator<LongWritable> iterator = values.iterator();
long sum = 0L;
while (iterator.hasNext()) {
LongWritable num = iterator.next();
sum += num.get();
}
//将求和的总数封装为LongWritable类型,并输出到HDFS
outValue.set(sum);
context.write(key, outValue);
}
}