wordcount计数
package WordCount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMap extends Mapper<LongWritable, Text,Text, IntWritable> {
@Override
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException {
String word = value.toString ();
String[] words = word.split (" ");
for (String w :words) {
context.write (new Text (w),new IntWritable (1));
}
}
}
package WordCount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReduce extends Reducer<Text, IntWritable,Text,IntWritable> {
@Override
protected void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
Integer count = 0;
for (IntWritable v : values){
count++;
}
context.write (key,new IntWritable (count));
}
}
package WordCount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{
Configuration conf = new Configuration ();
conf.set ("yarn.resorcemanager.hostname","node01");
conf.set ("fs.deafutFS","hdfs://node01:9000/");
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass (WordCountReduce.class);
job.setMapOutputKeyClass (Text.class);
job.setMapOutputValueClass (IntWritable.class);
job.setOutputKeyClass (Text.class);
job.setOutputKeyClass (IntWritable.class);
job.setInputFormatClass (TextInputFormat.class);
job.setOutputFormatClass (TextOutputFormat.class);
FileInputFormat.setInputPaths (job,new Path (args[0]));
FileOutputFormat.setOutputPath (job,new Path(args[1]));
job.submit ();
boolean result = job.waitForCompletion (true);
System.exit (result ? 0 : 1);
}
}
IDEA打JAR包见以下链接
https://blog.youkuaiyun.com/njxiaoxiao79/article/details/85679992
在xshell中将jar包导入虚拟机
# rz
hadoop fs -ls /wc/out/
hadoop fs -cat /wc/hahaha/part-r-00000/
答案:
TBL 3
ZDP 2
hello 11
jerry 1
kitty 1
rose 2
tom 2

import org.apache.commons.collections.bag.SynchronizedSortedBag;
import java.util.HashMap;
import java.util.Map;
import static javax.activation.FileTypeMap.map;
public class Main {
public static void main(String[] args) {
String word = "hello tom\n" +
"hello rose\n" +
"hello jerry\n" +
"hello TBL\n" +
"hello tom\n" +
"hello kitty\n" +
"hello rose\n" +
"hello TBL\n" +
"hello ZDP\n" +
"hello ZDP\n" +
"hello TBL\n";
Map<String,Integer> map = new HashMap<String,Integer> ();
String [] split = word.split ("\n");
int count = 0;
for (String w :split) {
String[] s = w.split (" ");
for (int i = 0;i < s.length;i++);
int i = 0;
if (map.containsKey(s[i])) {
Integer c = map.get (s[i]);
c = c + 1;
map.put (s[i],c);
}
}
}
}
}