Hadoop 2.2.0新API的WordCount例子(运行通过)

最新推荐文章于 2025-12-04 13:32:18 发布

原创最新推荐文章于 2025-12-04 13:32:18 发布 · 797 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#hadoop #map #wordcount

Hadoop学习专栏收录该内容

0 篇文章

订阅专栏

本文介绍了一个使用Hadoop实现的WordCount程序新版本。该程序通过MapReduce框架处理大量文本数据，统计每个单词出现的频率。具体包括Mapper和Reducer两个阶段的详细实现过程，并展示了如何设置作业参数以及配置输入输出路径。

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class NewVersionWordCount {

public static class MapClass extends Mapper<LongWritable, Text, Text, IntWritable>{

private final static IntWritable one=new IntWritable(1);
private Text word=new Text();

protected void setup(Context context){
System.out.println("mapper setup:"+context.getJobName());
}

public void map(LongWritable key,Text value,Context context){//map函数
String line=value.toString();
StringTokenizer tokenizer=new StringTokenizer(line);
try{
while(tokenizer.hasMoreTokens()){
word.set(tokenizer.nextToken());
context.write(word, one);
}}catch (Exception e) {
e.printStackTrace();
}

}

protected void cleanup(Context context){
System.out.println("mapper cleanup:"+context.getUser());
}
}

public static class ReduceClass extends Reducer<Text, IntWritable, Text, IntWritable>{

protected void setup(Context context){
System.out.println("reduce setup:"+context.getUser());
}

public void reduce(Text key,Iterable<IntWritable> values,Context context){//reduce函数
int sum=0;
for(IntWritable value:values){
sum+=value.get();
}
try{
context.write(key, new IntWritable(sum));
}catch (Exception e) {
e.printStackTrace();
}

}

protected void cleanup(Context context){
System.out.println("reduce cleanup:"+context.getUser());
}

}

public static void main(String[] args)throws Exception {

String inputPath="/user/User/input";
String outPath="/user/User/output";

Configuration conf=new Configuration();
Job job=new Job(conf);
job.setJobName("wordCount");
job.setJarByClass(NewVersionWordCount.class);
job.setMapperClass(MapClass.class);
job.setReducerClass(ReduceClass.class);

job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.TextInputFormat.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

/*FileSystem fileSystem=FileSystem.get(conf);
if(fileSystem.exists(new Path(outPath))){
fileSystem.delete(new Path(outPath),true);
}*/

FileInputFormat.addInputPath(job,new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outPath));

boolean complete=job.waitForCompletion(true);
if(!complete){
throw new RuntimeException();
}

}

}