关于hadoop job中job.setOutputFormatClass和FileOutputFormat.setOutputPath容易报错的问题

1.job.setOutputFormatClass(TextOutputFormat.class);

TextOutputFormat的为新的API中org.apache.hadoop.mapreduce.lib.output.TextOutputFormat

      

2.FileOutputFormat.setOutputPath(job, tempIndexPath);

FileOutputFormat为新api中的 org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

package com.yjxxt.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapreduce.Job; import java.io.IOException; public class WordCountJob { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // 加载配置文件 Configuration conf = new Configuration(true); // 本地模式运行 conf.set("mapreduce.framework.name","local"); // 创建作业 Job job = Job.getInstance(conf); // 设置作业主类 job.setJarByClass(WordCountJob.class); // 设置作业名称 job.setJobName("yjx-WordCount-"+System.currentTimeMillis()); // 设置 Reduce 的数量 job.setNumReduceTasks(2); // 设置数据的输入路径(需要计算的数据从哪里读) FileInputFormat.setInputPaths(job,new Path("/yjx/harry potter.txt")); // 设置数据的输出路径(计算后的数据输出到哪里) FileOutputFormat.setOutputPath(job,new Path("/yjx/result/"+job.getJobName())); // 设置 Map 的输出的 Key Value 的类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // 设置 Map Reduce 的处理类 job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); // 将作业提交到集群并等待完成 job.waitForCompletion(true); } } FileInputFormat.setInputPaths(job,new Path("/yjx/harry potter.txt")); FileOutputFormat.setOutputPath(job,new Path("/yjx/result/"+job.getJobName()));爆红,显示类型不对
03-08
package com.dajiangtai.hadoop.tv; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class ParseAndFilterLog extends Configured implements Tool { public static class ExtractTVMsgLogMapper extends Mapper<LongWritable, Text, Text, Text, Text> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { DataUtil.transData(value.toString(), context); } } public int run(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: ParseAndFilterLog <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "ParseAndFilterLog"); job.setJarByClass(ParseAndFilterLog.class); job.setMapperClass(ExtractTVMsgLogMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.getConfiguration().set("mapreduce.output.textoutputformat.separator", "@"); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new Configuration(), new ParseAndFilterLog(), args); System.exit(exitCode); } }修改这段代码使其不报错
最新发布
07-02
上述情况均正常的情况下,依然出现问题 [root@hadoop04 ~]# hadoop jar film001.jar CleanDriver /film/input /film/outputs/cleandata 25/03/14 18:21:36 INFO client.RMProxy: Connecting to ResourceManager at hadoop04/192.168.100.104:8032 Exception in thread "main" org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://hadoop04:9000/film/input already exists at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.checkOutputSpecs(FileOutputFormat.java:146) at org.apache.hadoop.mapreduce.JobSubmitter.checkSpecs(JobSubmitter.java:266) at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:139) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290) at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762) at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287) at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1308) at CleanDriver.main(CleanDriver.java:35) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:226) at org.apache.hadoop.util.RunJar.main(RunJar.java:141) [root@hadoop04 ~]#
03-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值