1 建立一个java项目,将hadoop依赖的包导入项目中
2 创建Mapper类
public class MapperClass extends Mapper<Object, Text, Text, IntWritable>{
IntWritable one = new IntWritable(1);
Text word = new Text();
protected void map(Object key, Text value,org.apache.hadoop.mapreduce.Mapper.Context context)
throws IOException, InterruptedException {
String string = value.toString();
StringTokenizer stringTokenizer = new StringTokenizer(string);
while(stringTokenizer.hasMoreTokens()){
word.set(stringTokenizer.nextToken());
context.write(word, one);
}
}
}
3 创建Reducer 类
public class ReducerClass extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
注意,千万不要用Eclipse的自动填充,默认会填充成下面的形式,但是在做UT的时候会造成断言不正确,原因不明,这个问题查了很久,所以一定不要在Context类名前加上包名。
protected void reduce(Text key, Iterable<IntWritable> values,org.apache.hadoop.mapreduce.Reducer.Context context)
throws IOException, InterruptedException {
}
4 编写主类
public class WordCount {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(MapperClass.class);
job.setCombinerClass(ReducerClass.class);
job.setReducerClass(ReducerClass.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
5 建立Test源码目录,引入MRUnit的包:mockito-all-1.8.0.jar mrunit-1.0.0-hadoop2.jar并加入Classpath,添加Junit4 lib
6 建立MRTest,并编写测试代码
public class MRTest {
MapDriver<Object, Text, Text, IntWritable> mapDriver;
ReduceDriver<Text, IntWritable, Text, IntWritable> redeceDriver;
MapReduceDriver<Object, Text,Text, IntWritable,Text, IntWritable> mapReduceDriver;
@Before
public void setUp(){
MapperClass mapper = new MapperClass();
ReducerClass reducer = new ReducerClass();
mapDriver = MapDriver.newMapDriver(mapper);
redeceDriver = ReduceDriver.newReduceDriver(reducer);
mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
}
@Test
public void testMapper() throws IOException{
mapDriver.withInput(new IntWritable(1), new Text("du kai is a good boy"));
mapDriver.withOutput(new Text("du"), new IntWritable(1))
.withOutput(new Text("kai"), new IntWritable(1))
.withOutput(new Text("is"), new IntWritable(1))
.withOutput(new Text("a"), new IntWritable(1))
.withOutput(new Text("good"), new IntWritable(1))
.withOutput(new Text("boy"), new IntWritable(1));
mapDriver.runTest();
}
@Test
public void testReduce() throws Exception{
List<IntWritable> values = new ArrayList<IntWritable>();
values.add(new IntWritable(1));
values.add(new IntWritable(1));
redeceDriver.withInput(new Text("6"), values).
withOutput(new Text("6"), new IntWritable(2))
.runTest();
}
@Test
public void test() throws IOException{
String line = "Dukai is a great boy is it not";
List<Pair<Text, IntWritable>> out = mapReduceDriver.withInput(new IntWritable(1) ,new Text(line)).run();
List<Pair<Text, IntWritable>> expected = new ArrayList<Pair<Text, IntWritable>>();
expected.add(new Pair(new Text("Dukai"),new IntWritable(1)));
expected.add(new Pair(new Text("a"),new IntWritable(1)));
expected.add(new Pair(new Text("boy"),new IntWritable(1)));
expected.add(new Pair(new Text("great"),new IntWritable(1)));
expected.add(new Pair(new Text("is"),new IntWritable(2)));
expected.add(new Pair(new Text("it"),new IntWritable(1)));
expected.add(new Pair(new Text("not"),new IntWritable(1)));
ExtendedAssert.assertListEquals(expected, out);
}
}
结果如下:
7 导出jar
8 上传jar到服务器
9 putty连上服务器,su到hadoop用户下,运行
hadoop jar /home/software/wc1.jar com.mapreduce.WordCount input/wc out/one
此前已经提前上传两个txt文件到input/wc
10 查看结果
MRUnit:http://mrunit.apache.org/