需求:有大量的文本(文档、网页),需要建立搜索索引
输入 输出
分析:
分两次MapReduce工作,第一次预期输出
atguigu--a.txt 3
atguigu--b.txt 2
atguigu--c.txt 2
pingping--a.txt 1
pingping--b.txt 3
pingping--c.txt 1
ss--a.txt 2
ss--b.txt 1
ss--c.txt 1
第二次预期输出
atguigu c.txt-->2 b.txt-->2 a.txt-->3
pingping c.txt-->1 b.txt-->3 a.txt-->1
ss c.txt-->1 b.txt-->1 a.txt-->2
1)第一次处理
(1)第一次处理,编写OneIndexMapper
package com.lzz.mapreduce.index;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
public class OneIndexMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
String name;
Text k=new Text();
IntWritable v=new IntWritable(1);
@Override
protected void setup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
//获取文件名
FileSplit split=(FileSplit)context.getInputSplit();
name=split.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable,