1.eclipse创建一个新的java project,记得引入hadoop的jar包。
2.编写程序如下:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
static final String INPUT_DIR = "hdfs://172.21.15.189:9000/input";
static final String OUTPUT_DIR = "hdfs://172.21.15.189:9000/output";
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Path path = new Path(OUTPUT_DIR);
Job job = new Job(conf, "WordCount");
FileInputFormat.setInputPaths(job, INPUT_DIR); //设置输入路径
FileOutputFormat.setOutputPath(job, path); //设置输出路径
job.setMapperClass(MyMapper.class); //设置自定义的mapper类
job.setReducerClass(MyReducer.class); //设置自定义的reduce类
job.setOutputKeyClass(Text.class); //设置输出的key的类型
job.setOutputValueClass(LongWritable.class); //设置输出的value类型
job.waitForCompletion(true); //开始执行
}
/**
* 自定义的map类
* @author Gary
*
*/
static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable k1, Text v1,
Mapper<LongWritable, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
String[] words = v1.toString().split(" ");
for(String word : words) {
context.write(new Text(word), new LongWritable(1));
}
}
}
/**
* 自定义的reduce类
* @author Gary
*
*/
static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text k2, Iterable<LongWritable> v2s,
Reducer<Text, LongWritable, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
long times = 0L;
for(LongWritable longWritable : v2s) {
times += longWritable.get();
}
context.write(k2, new LongWritable(times));
}
}
}
对于运行时出现权限问题报错: