大数据实战（下）_MapReduce实战

最新推荐文章于 2024-11-28 11:47:52 发布

原创最新推荐文章于 2024-11-28 11:47:52 发布 · 871 阅读

2 ·

CC 4.0 BY-SA版权

文章标签：

#MapReduce

hadoop学习笔记专栏收录该内容

6 篇文章

订阅专栏

本文详细介绍了一个简单的Hadoop MapReduce实例——Linecount程序的实现过程。该程序用于统计输入文件中的行数，并通过MapReduce框架进行计算。文章提供了完整的Java代码示例，包括Mapper、Reducer和Driver模块的具体实现。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

大纲

演示实例讲解
演示编写MapReduce实例

MapReduce代码

创建 linecount Java 项目
代码如下：

package com.trendwise.java;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class Linecount {
    //This is the Mapper Module, i.e. Map.java
    public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable obj = new IntWritable(1);
        private Text words = new Text("Total Lines are");

        public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
                throws IOException {
            output.collect(words, obj);
        }
    }
    // This is the Reducer Module, i.e. Reduce.java
    public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output,
                Reporter reporter) throws IOException {
            int sum1 = 0;
            while (values.hasNext()) {
                sum1 += values.next().get();
            }
            output.collect(key, new IntWritable(sum1));
        }
    }
    //This is the Driver Module
    public static void main(String[] args) throws Exception {
        JobConf config = new JobConf(Linecount.class);
        config.setJobName("Linecount");
        config.setOutputKeyClass(Text.class);
        config.setOutputValueClass(IntWritable.class);
        config.setMapperClass(Map.class);
        config.setCombinerClass(Reduce.class);
        config.setReducerClass(Reduce.class);
        config.setInputFormat(TextInputFormat.class);
        config.setOutputFormat(TextOutputFormat.class);
        FileInputFormat.setInputPaths(config, new Path(args[0]));
        FileOutputFormat.setOutputPath(config, new Path(args[1]));
        JobClient.runJob(config);
    }
}