WordCount.java
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
//依次对应
//LongWritable-输入数据键类型(行号)
//Text-输入数据值类型(key-value对)
//Text-输出数据键类型
//IntWritable-输出数据值类型
//举例:输入数据:[1] [apple 1] 输出 [apple] [1]
public class WordCount extends Mapper<LongWritable, Text, Text, IntWritable>{
protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,IntWritable>.Context context)
throws java.io.IOException ,InterruptedException {
String line=value.toString();
StringTokenizer st=new StringTokenizer(line);
while(st.hasMoreTokens()){
String word=st.nextToken();
context.write(new Text(word), new IntWritable(1));//map的输出
}
};
}
WordCountReduce.java
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
//输入map以后的键-值形式
public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, java.lang.Iterable<IntWritable> iterable, org.apache.hadoop.mapreduce.Reducer<Text,IntWritable,Text,IntWritable>.Context context)
throws java.io.IOException ,InterruptedException {
int sum=0;
//这里的iterable里是有多少个键就有多少个1,比如hello 1 //hello 1 hello 1 则iterable就是由3个1组成的迭代器
for(IntWritable i:iterable){
sum=sum+i.get();
}
context.write(key, new IntWritable(sum));
};
}
JobRun.java
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class JobRun {
public static void main(String [] args) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf=new Configuration();
try {
Job job=Job.getInstance(conf,"word count");
job.setJarByClass(JobRun.class);
job.setMapperClass(WordCount.class);
job.setCombinerClass(WordCountReduce.class);
job.setReducerClass(WordCountReduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置mapreduce输入文件的目录
FileInputFormat.addInputPath(job, new Path("/usr/input/wc"));
//设置mapreduce输出文件的目录
FileOutputFormat.setOutputPath(job, new Path("/usr/output/wc"));
//等待Job完成
System.exit(job.waitForCompletion(true)?0:1);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
特别注意:
如果以前运行成功过需要删除输出文件,不然重新启动会出错
执行方法:
1.把wenjian用Eclipse打包成jar包
2.命令:
/hadoop/bin/hadoop jar /home/hadoop/temp/wc.jar JobRun
/home/hadoop/temp/wc.jar为本地目录
输入文件要拷入hdfs文件系统所在目录
JobRun为主方法名
3.去输出文件夹查看效果:
322

被折叠的 条评论
为什么被折叠?



