/*ORCMapper.java*/ import java.io.IOException; import java.util.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.hive.ql.io.orc.*; import org.apache.hadoop.io.*; public class ORCMapper extends MapReduceBase implements Mapper<LongWritable, Text, NullWritable, Writable>{OrcSerde serde; @Override public void configure(JobConf job) { serde = new OrcSerde(); } @Override public void map(LongWritable key, Text value, OutputCollector<NullWritable, Writable> output, Reporter reporter) throws IOException { output.collect(NullWritable.get(),serde.serialize(value, null)); } } /*ORCReducer.java*/ import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class ORCReducer extends MapReduceBase implements Reducer<NullWritable, Writable, NullWritable, Writable>{ @Override public void reduce(NullWritable key, Iterator<Writable> values, OutputCollector<NullWritable, Writable> output, Reporter reporter) throws IOException { Writable value = values.next(); output.collect(key, value); } } /*ORCDriver.java*/ import java.io.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.hive.ql.io.orc.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; public class ORCDriver { public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { JobClient client = new JobClient(); JobConf conf = new JobConf("ORC_Generator"); conf.setInputFormat(TextInputFormat.class); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(Writable.class); conf.setOutputFormat(OrcOutputFormat.class);FileInputFormat.addInputPath(conf, new Path("hdfs://localhost:9000/path/to/ipdir/textfile"));OrcOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:9000/path/to/opdir/orcfile")); conf.setMapperClass(ORCMapper.class); System.out.println(OrcOutputFormat.getWorkOutputPath(conf)); conf.setNumReduceTasks(0); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } } }
转载于:https://blog.51cto.com/houjt/1613169