Step 1:
导入Hadoop中MapReduce的所有jar包
Step 2:WordCount 的Mapper
public class WCMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
IntWritable v = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String tmp = value.toString();
String[] arr = tmp.split(" ");
for(String s : arr){
if(!"".equals(s)){
Text k = new Text(s);
context.write(k, v);
}
}
}
}
Step 3:WordCount的Reducer
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for(IntWritable value : values){
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
Step 4:WordCount的Main—RunJob
public class RunJob {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.newInstance(conf);
Job job = Job.getInstance();
job.setJarByClass(RunJob.class);
job.setJobName("wordcount");
// job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setMapperClass(WCMapper.class);
job.setReducerClass(WCReducer.class);
job.setCombinerClass(WCReducer.class);
job.setNumReduceTasks(3);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// job.setCombinerClass(cls);
FileInputFormat.addInputPath(job, new Path("/data/"));
Path output = new Path("/wc");
if(fs.exists(output)){
fs.delete(output, true);
}
FileOutputFormat.setOutputPath(job, output);
boolean flag = job.waitForCompletion(true);
if(flag){
System.out.println("Job finished !");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}