案例二
现有一个文件,需要统计奇偶行的和,即:统计 1,3,5…的和与2、4、6…行的和
分析:
k1是行号,v1是行记录
<k1,v1>----map----<k2,v2>----reduce----<k3,v3>
1 12 key1,[v,v,v,…] 奇数:num
2 13 key2,[v,v,v,…] 偶数:num
3 24 key3,[v,v,v,…] 奇数:num
MyRecordReader
public class MyRecordReader extends RecordReader<LongWritable, Text>{
private long start;
private long pos;
private long end;
private LineReader in;
private FSDataInputStream fileIn;
private LongWritable key;
private Text value;
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
FileSplit filesplit = (FileSplit) split;
Path path = filesplit.getPath();//拿到了文件名称
start = filesplit.getStart();
end = start + filesplit.getLength();
Configuration conf = context.getConfiguration();
FileSystem fs = path.getFileSystem(conf);
fileIn = fs.open(path);
fileIn.seek(start);
in = new LineReader(fileIn);
pos=1;
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
if(key==null){
key = new LongWritable();
}
key.set(pos);
if(value==null){
value = new Text();
}
if(in.readLine(value)==0){
return false;
}
pos++;
return true;
}
@Override
public LongWritable getCurrentKey() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return 0;
}
@Override
public void close() throws IOException {
in.close();
}
}
MyInputFormat
public class MyInputFormat extends FileInputFormat<LongWritable, Text>{
@Override
public RecordReader<LongWritable,Text> createRecordReader(InputSplit arg0, TaskAttemptContext arg1)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
return new MyRecordReader();
}
@Override
protected boolean isSplitable(JobContext context, Path file) {
return false;
}
}
map函数
public class MyMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
if(key.get()%2==0){
context.write(new Text("偶数:"), value);
}else{
context.write(new Text("奇数:"), value);
}
}
}
reduce函数
public class MyReducer extends Reducer<Text, Text, Text, LongWritable>{
@Override
protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
int num = 0;
for(Text t :value){
num+=Integer.parseInt(t.toString());
}
context.write(key, new LongWritable(num));
}
}
驱动类
public class MyDriver {
public static void main(String[] args) throws Exception, Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path("E:/data/singledouble/output");
if(fs.exists(path)){
fs.delete(path);
}
Job job = Job.getInstance();
job.setJobName("singledouble");
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setInputFormatClass(MyInputFormat.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.addInputPath(job, new Path("E:/data/singledouble/input/*"));
FileOutputFormat.setOutputPath(job, new Path("E:/data/singledouble/output"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
运行结果