hadoop----mapreduce的案例(二)(自定义InputFormat)

该博客介绍了如何使用Hadoop MapReduce处理一个特定需求:统计文件中奇偶行的和。通过自定义MyRecordReader和MyInputFormat,实现了将行号作为key,行内容作为value进行映射,然后在reduce阶段分别计算奇数行和偶数行的总和。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

案例二

现有一个文件,需要统计奇偶行的和,即:统计 1,3,5…的和与2、4、6…行的和
在这里插入图片描述
分析:

k1是行号,v1是行记录

<k1,v1>----map----<k2,v2>----reduce----<k3,v3>
1 12 key1,[v,v,v,…] 奇数:num
2 13 key2,[v,v,v,…] 偶数:num
3 24 key3,[v,v,v,…] 奇数:num

MyRecordReader


public class MyRecordReader extends RecordReader<LongWritable, Text>{
	private long start;
	private long pos;
	private long end;
	private LineReader in;
	private FSDataInputStream fileIn;
	private LongWritable key;
	private Text value;
	
	@Override
	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		
		FileSplit filesplit = (FileSplit) split;
		Path path = filesplit.getPath();//拿到了文件名称
		start = filesplit.getStart();
		end = start + filesplit.getLength();
		
		Configuration conf = context.getConfiguration();
		FileSystem fs = path.getFileSystem(conf);
		fileIn = fs.open(path);
		fileIn.seek(start);
		in = new LineReader(fileIn);
		pos=1;
	}

	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		if(key==null){
			key = new LongWritable();
		}
		key.set(pos);
		
		if(value==null){
			value = new Text();
		}
		if(in.readLine(value)==0){
			return false;
		}
		pos++;
		return true;
	}

	@Override
	public LongWritable getCurrentKey() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return key;
	}

	@Override
	public Text getCurrentValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return value;
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return 0;
	}

	@Override
	public void close() throws IOException {
		in.close();
	}
}

MyInputFormat


public class MyInputFormat extends FileInputFormat<LongWritable, Text>{

	@Override
	public RecordReader<LongWritable,Text> createRecordReader(InputSplit arg0, TaskAttemptContext arg1)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return new MyRecordReader();
	}
	
	@Override
	protected boolean isSplitable(JobContext context, Path file) {
		return false;
	}
}

map函数

public class MyMapper extends Mapper<LongWritable, Text, Text, Text>{

	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		
		if(key.get()%2==0){
			context.write(new Text("偶数:"), value);
		}else{
			context.write(new Text("奇数:"), value);
		}
	}
}

reduce函数

public class MyReducer extends Reducer<Text, Text, Text, LongWritable>{

	@Override
	protected void reduce(Text key, Iterable<Text> value, Reducer<Text, Text, Text, LongWritable>.Context context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		int num = 0;
		for(Text t :value){
			num+=Integer.parseInt(t.toString());
		}
		context.write(key, new LongWritable(num));
	}
}

驱动类

public class MyDriver {
	public static void main(String[] args) throws Exception, Exception {
		
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path path = new Path("E:/data/singledouble/output");
		if(fs.exists(path)){
			fs.delete(path);
		}
		
		Job job = Job.getInstance();
		job.setJobName("singledouble");
		
		job.setJarByClass(MyDriver.class);
		job.setMapperClass(MyMapper.class);
		job.setReducerClass(MyReducer.class);
		
		job.setInputFormatClass(MyInputFormat.class);
		
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(LongWritable.class);

		FileInputFormat.addInputPath(job, new Path("E:/data/singledouble/input/*"));
		FileOutputFormat.setOutputPath(job, new Path("E:/data/singledouble/output"));
		
		System.exit(job.waitForCompletion(true)?0:1);
	}

}

运行结果

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值