1. jar files:
hadoop-0.20.2-core.jar
commons-cli-1.2.jar
commons-logging-1.2.jar
2. code:
package com.demo;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
//import org.apache.hadoop.mapreduce.JobContext;
public class Demo {
public static boolean deleteHDFSFile(String dst) throws IOException
{
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(config);
Path path = new Path(dst);
boolean isDeleted = hdfs.delete(path,true);
hdfs.close();
return isDeleted;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("mapred.jar", "demo.jar");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: youjar <in> <out>");
System.exit(2);
}
String in = otherArgs[0];
String out = otherArgs[1];
Job job = new Job(conf,"Demo");
job.setJarByClass(com.demo.Demo.class);
deleteHDFSFile(out);
FileInputFormat.addInputPath(job, new Path(in));
FileOutputFormat.setOutputPath(job, new Path(out));
job.setMapperClass(com.demo.MapClass.class);
job.setReducerClass(com.demo.Reduce.class);
job.setInputFormatClass(MyInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
class MapClass extends Mapper<Text,Text,Text,Text> {
public void map(Text key,Text value,Context context)
throws IOException, InterruptedException {
System.out.println("key:"+key+",value:"+value);
context.write(value, key);
}
}
class Reduce extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String csv = "";
for(Text value : values) {
if( csv.length() > 0 ) csv += ",";
csv += value.toString();
}
System.out.println("key:"+key+",csv:"+csv);
context.write(key, new Text(csv));
}
}
class MyInputFormat extends FileInputFormat<Text,Text> {
@Override
protected boolean isSplitable(JobContext context, Path file) {
CompressionCodec codec =
new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
return codec == null;
}
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException,
InterruptedException {
return new MyRecordReader(context.getConfiguration());
}
}
class MyRecordReader extends RecordReader<Text,Text> {
private final LineRecordReader lineRecordReader;
private byte separator = (byte) ',';
private Text innerValue;
private Text key;
private Text value;
public MyRecordReader(Configuration conf) {
lineRecordReader = new LineRecordReader();
}
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
lineRecordReader.close();
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return lineRecordReader.getProgress();
}
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
lineRecordReader.initialize(genericSplit, context);
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
byte[] line = null;
int lineLen = -1;
if( lineRecordReader.nextKeyValue() ) {
innerValue = lineRecordReader.getCurrentValue();
line = innerValue.getBytes();
lineLen = innerValue.getLength();
} else {
return false;
}
if( line == null )
return false;
if( key == null )
key = new Text();
if( value == null )
value = new Text();
int pos = findSeparator(line, 0, lineLen, this.separator);
setKeyValue(key,value,line,lineLen,pos);
return true;
}
public int findSeparator(byte[] utf, int start, int length, byte sep) {
for( int i = start; i < (start + length); ++ i ) {
if( utf[i] == sep ) {
return i;
}
}
return -1;
}
public void setKeyValue(Text key, Text value, byte[] line,
int lineLen, int pos) {
if( pos == -1 ) {
key.set(line, 0, lineLen);
value.set("");
} else {
key.set(line, 0, pos);
value.set(line,pos+1,lineLen-pos-1);
}
}
}