单词计数wordcount全部代码

最新推荐文章于 2022-01-01 10:17:28 发布

原创最新推荐文章于 2022-01-01 10:17:28 发布 · 475 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#hdfs

hdfs 专栏收录该内容

11 篇文章

订阅专栏

package hadoop;
//单词计数wordcount全部代码
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MapReduceWC {

public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
	
	protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws java.io.IOException ,InterruptedException 
	{
		String line = value.toString();
		String[] split = line.split(",");
		for (String word : split) {
			context.write(new Text(word), new LongWritable(1L));
		}
	};	
}

public static class MyReduce extends Reducer<Text, LongWritable, Text, LongWritable>{
	
	@Override
	protected void reduce(Text k2, Iterable<LongWritable> v2s,
			Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
		long count = 0l;
		for (LongWritable value : v2s) {
			count += value.get();
		}
		context.write(k2, new LongWritable(count));		
	}
}


public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, MapReduceWC.class.getSimpleName());
	job.setJarByClass(MapReduceWC.class);	
	FileInputFormat.addInputPath(job, new Path(args[0]));	
	job.setMapperClass(MyMapper.class);	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(LongWritable.class);
	job.setReducerClass(MyReduce.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);	
	FileOutputFormat.setOutputPath(job, new Path(args[1]));	
	job.waitForCompletion(true);	
}

}