hadoop 的job.setOutputKeyClass和job.setOutputValueClass的几个问题

本文分享了作者在实现MapReduce函数过程中遇到的关于hadoop job.setOutputKeyClass和job.setOutputValueClass设置输出的问题及解决方案。当map和reduce输出不一致时,需使用job.setMapOutputKeyClass和job.setMapOutputValueClass进行正确配置。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


昨天写了一个mapreduce函数一直有错误,找不到错误,今天找了一天终于解决了,原来是hadoop 的job.setOutputKeyClass和job.setOutputValueClas设置输出的问题。


job.setOutputKeyClass和job.setOutputValueClas在默认情况下是同时设置map阶段和reduce阶段的输出,也就是说只有map和reduce输出是一样的时候才不会出问题。

当map和reduce输出是不一样的时候就需要通过job.setMapOutputKeyClass和job.setMapOutputValueClas来设置map阶段的输出。


package com.dajiangtai.hadoop.tv; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class ParseAndFilterLog extends Configured implements Tool { public static class ExtractTVMsgLogMapper extends Mapper<LongWritable, Text, Text, Text, Text> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { DataUtil.transData(value.toString(), context); } } public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ParseAndFilterLog <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "ParseAndFilterLog"); job.setJarByClass(ParseAndFilterLog.class); job.setMapperClass(ExtractTVMsgLogMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.getConfiguration().set("mapreduce.output.textoutputformat.separator", "@"); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new Configuration(), new ParseAndFilterLog(), args); System.exit(exitCode); } }修改这段代码使其不报错
07-02
package com.dajiangtai.hadoop.tv; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class ParseAndFilterLog extends Configured implements Tool { public static class ExtractTVMsgLogMapper extends Mapper<LongWritable, Text, Text, Text> { @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 实际处理逻辑需要在这里实现 // 示例:简单传递原始值 context.write(new Text("key"), value); } } @Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ParseAndFilterLog <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "ParseAndFilterLog"); job.setJarByClass(ParseAndFilterLog.class); job.setMapperClass(ExtractTVMsgLogMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.getConfiguration().set("mapreduce.output.textoutputformat.separator", "@"); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new ParseAndFilterLog(), args); System.exit(exitCode); } }这个代码刚刚那段代码刚刚合起来重新修改一下
最新发布
07-02
package com.dajiangtai.hadoop.tv; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import java.io.IOException; public class ParseAndFilterLog extends Configured implements Tool { public static class ExtractTVMsgLogMapper extends Mapper<LongWritable, Text, Text, Text> { private final Text outputKey = new Text(); private final Text outputValue = new Text(); @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] parsedData = parseAndFilterData(value.toString()); if (parsedData != null && parsedData.length == 7) { String keyPart = parsedData[0] + "@" + parsedData[1]; String valuePart = String.join("@", parsedData[2], parsedData[3], parsedData[4], parsedData[5], parsedData[6]); outputKey.set(keyPart); outputValue.set(valuePart); context.write(outputKey, outputValue); } } private String[] parseAndFilterData(String data) { // 实际解析逻辑应在此实现 return data.split(","); // 示例:按逗号分割 } } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ParseAndFilterLog <input> <output>"); return 1; } Job job = Job.getInstance(conf, "TV Log Parser"); job.setJarByClass(ParseAndFilterLog.class); job.setMapperClass(ExtractTVMsgLogMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); // 无Reducer FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new ParseAndFilterLog(), args); System.exit(exitCode); } }import org.apache.hadoop.fs.Path;import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; 这几个饮用能不能去除,因为总是报错,你帮我修改一下
07-02
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值