1、需求
统计单词出现次数
2、代码实现
-
1、WordCountMapper.clss
package com.bigdata.surfilter.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * [@Author](https://my.oschina.net/arthor) liufu * @CreateTime 2016/7/25 15:57 */ public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ Text k = null; IntWritable v = null; [@Override](https://my.oschina.net/u/1162528) protected void setup(Context context) throws IOException, InterruptedException { k = new Text(); v = new IntWritable(); } [@Override](https://my.oschina.net/u/1162528) protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = value.toString().split("\t"); for (String word : fields){ k.set(word); v.set(1); context.write(k, v); } } [@Override](https://my.oschina.net/u/1162528) protected void cleanup(Context context) throws IOException, InterruptedException { k = null; v = null; } }
-
2、WordCountReduce.class
package com.bigdata.surfilter.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * @Author liufu * @CreateTime 2016/7/25 16:11 */ public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{ IntWritable v = null; @Override protected void setup(Context context) throws IOException, InterruptedException { v = new IntWritable(); } @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable value : values){ count += value.get(); } v.set(count); context.write(key, v); } @Override protected void cleanup(Context context) throws IOException, InterruptedException { v = null; } }
-
3、ApplicationRun.class
package com.bigdata.surfilter; import com.bigdata.surfilter.wordcount.WordCountMapper; import com.bigdata.surfilter.wordcount.WordCountReduce; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException; /** * @Author liufu * @CreateTime 2016/7/25 16:35 */ public class ApplicationRun { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", "hdfs://192.168.0.186:9000"); //伪装自己的身份为root System.setProperty("HADOOP_USER_NAME", "root"); Job job = new Job(conf, "wordCount"); //通过classpath中主类找到jar job.setJarByClass(ApplicationRun.class); //job的map端和reduce端代码 job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReduce.class); //设置map端和reduce输出的类型,这样才能够做反射得到对应的类 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //job 如何读取数据,如何写出数据 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //job 的数据从哪里来; 绑定输入目录,可以使用setInputPaths, 也可以使用 addInputPaths // FileInputFormat.setInputPaths(job, new Path("/wordcount/input1/"),new Path("/wordcount/input2/")); FileInputFormat.addInputPath(job, new Path("/wordcount/input1/")); FileInputFormat.addInputPath(job, new Path("/wordcount/input2/")); //写到哪里去 FileOutputFormat.setOutputPath(job, new Path("/wordcount/output/")); try { boolean b = job.waitForCompletion(true); System.exit(b == true ? 0 : 1); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }