Hadoop Day05~MapReduce的wordcount计数小案例

最新推荐文章于 2025-05-07 09:01:12 发布

buzhidaoyaa

最新推荐文章于 2025-05-07 09:01:12 发布

阅读量100

点赞数

本文链接：https://blog.youkuaiyun.com/buzhidaoyaa/article/details/100568656

版权

wordcount计数

package WordCount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WordCountMap extends Mapper<LongWritable, Text,Text, IntWritable> {

    @Override
    protected  void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException {
        String word = value.toString ();
        String[] words = word.split (" ");
        for (String w :words) {
            context.write (new Text (w),new IntWritable (1));
        }

    }


}

package WordCount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class WordCountReduce extends Reducer<Text, IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
        Integer count = 0;
        for (IntWritable v : values){
            count++;
        }
        context.write (key,new IntWritable (count));
    }
}

package WordCount;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;

public class WordCountDriver {
    public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{
        Configuration conf = new Configuration ();
        conf.set ("yarn.resorcemanager.hostname","node01");
        conf.set ("fs.deafutFS","hdfs://node01:9000/");
        Job job = Job.getInstance(conf);
        job.setJarByClass(WordCountDriver.class);
        // 设置本次job是使用map,reduce
        job.setMapperClass(WordCountMap.class);
        job.setReducerClass (WordCountReduce.class);
        // 设置本次map和reduce的输出类型
        job.setMapOutputKeyClass (Text.class);
        job.setMapOutputValueClass (IntWritable.class);
        job.setOutputKeyClass (Text.class);
        job.setOutputKeyClass (IntWritable.class);

        job.setInputFormatClass (TextInputFormat.class);
        job.setOutputFormatClass (TextOutputFormat.class);


        FileInputFormat.setInputPaths (job,new Path (args[0]));
        FileOutputFormat.setOutputPath (job,new Path(args[1]));

        job.submit ();


        boolean result = job.waitForCompletion (true);
        System.exit (result ? 0 : 1);


    }

}

IDEA打JAR包见以下链接

https://blog.youkuaiyun.com/njxiaoxiao79/article/details/85679992

在xshell中将jar包导入虚拟机


# rz

hadoop fs -ls /wc/out/
hadoop fs -cat  /wc/hahaha/part-r-00000/
答案：
TBL	     3
ZDP      2
hello	 11
jerry	 1
kitty	 1
rose	 2
tom	     2

在这里插入图片描述

import org.apache.commons.collections.bag.SynchronizedSortedBag;

import java.util.HashMap;
import java.util.Map;

import static javax.activation.FileTypeMap.map;

public class Main {

    public static void main(String[] args) {
        String word = "hello tom\n" +
                "hello rose\n" +
                "hello jerry\n" +
                "hello TBL\n" +
                "hello tom\n" +
                "hello kitty\n" +
                "hello rose\n" +
                "hello TBL\n" +
                "hello ZDP\n" +
                "hello ZDP\n" +
                "hello TBL\n";
        Map<String,Integer> map = new HashMap<String,Integer> ();
        String [] split = word.split ("\n");
        int count = 0;
        for (String w :split) {
            String[] s = w.split (" ");
            for (int i = 0;i < s.length;i++);
            int i = 0;
            if (map.containsKey(s[i])) {
                    Integer c = map.get (s[i]);
                    c = c + 1;
                    map.put (s[i],c);
                }
        }
    }
    //for (Map.Entry<String,Integer> m : map.entrySet()){
       // System.out.println(m.getKey ()+":"+m.getValue ());

    }
}