mapreduce demo

最新推荐文章于 2019-11-11 15:09:14 发布

linuf

最新推荐文章于 2019-11-11 15:09:14 发布

阅读量479

点赞数 1

CC 4.0 BY-SA版权

本文链接：https://blog.youkuaiyun.com/linuf/article/details/51605456

本文介绍了一个使用Hadoop MapReduce的简单示例程序，包括如何删除HDFS上的文件、设置输入输出路径、定义Mapper和Reducer类以及自定义输入格式。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1. jar files:

hadoop-0.20.2-core.jar

commons-cli-1.2.jar

commons-logging-1.2.jar

2. code:

package com.demo;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//import org.apache.hadoop.mapreduce.JobContext;

public class Demo {
	

	public static boolean deleteHDFSFile(String dst) throws IOException
	{
	  Configuration config = new Configuration();
	  FileSystem hdfs = FileSystem.get(config);
	  Path path = new Path(dst);
	  boolean isDeleted = hdfs.delete(path,true);
	  hdfs.close();
	  return isDeleted;
	}
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		conf.set("mapred.jar", "demo.jar");
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
	    if (otherArgs.length != 2) {
	        System.err.println("Usage: youjar <in> <out>");
	        System.exit(2);
	      }
		String in = otherArgs[0];
		String out = otherArgs[1];
		Job job = new Job(conf,"Demo");
		job.setJarByClass(com.demo.Demo.class);
		deleteHDFSFile(out);
		FileInputFormat.addInputPath(job, new Path(in));
		FileOutputFormat.setOutputPath(job, new Path(out));
		
		
		job.setMapperClass(com.demo.MapClass.class);
		job.setReducerClass(com.demo.Reduce.class);
		job.setInputFormatClass(MyInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}


}

class MapClass extends Mapper<Text,Text,Text,Text> {
	
	public void map(Text key,Text value,Context context) 
			throws IOException, InterruptedException {
		System.out.println("key:"+key+",value:"+value);
		context.write(value, key);
	}
}

class Reduce extends Reducer<Text,Text,Text,Text> {
	
	public void reduce(Text key, Iterable<Text> values, Context context)
		throws IOException, InterruptedException {
		
		String csv = "";
		for(Text value : values) {
			if( csv.length() > 0 ) csv += ",";
			csv += value.toString();
		}
		System.out.println("key:"+key+",csv:"+csv);
		context.write(key, new Text(csv));
	}
}

class MyInputFormat extends FileInputFormat<Text,Text> {
	
	@Override
	  protected boolean isSplitable(JobContext context, Path file) {
	    CompressionCodec codec = 
	      new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
	    return codec == null;
	  }
	@Override
	public RecordReader<Text, Text> createRecordReader(InputSplit split,
			TaskAttemptContext context) throws IOException,
			InterruptedException {
		
		return new MyRecordReader(context.getConfiguration());
	}
}

class MyRecordReader extends RecordReader<Text,Text> {
	private final LineRecordReader lineRecordReader;
	private byte separator = (byte) ',';
	
	private Text innerValue;
	private Text key;
	private Text value;
	
	public MyRecordReader(Configuration conf) {
		lineRecordReader = new LineRecordReader();
	}
	
	@Override
	public void close() throws IOException {
		// TODO Auto-generated method stub
		lineRecordReader.close();
	}

	@Override
	public Text getCurrentKey() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return key;
	}

	@Override
	public Text getCurrentValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return value;
	}

	@Override
	public float getProgress() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		return lineRecordReader.getProgress();
	}

	@Override
	public void initialize(InputSplit genericSplit, TaskAttemptContext context)
			throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		lineRecordReader.initialize(genericSplit, context);
	}

	@Override
	public boolean nextKeyValue() throws IOException, InterruptedException {
		// TODO Auto-generated method stub
		byte[] line = null;
		int lineLen = -1;
		
		if( lineRecordReader.nextKeyValue() ) {
			innerValue = lineRecordReader.getCurrentValue();
			line = innerValue.getBytes();
			lineLen = innerValue.getLength();
		} else {
			return false;
		}
		
		if( line == null ) 
			return false;
		if( key == null )
			key = new Text();
		if( value == null )
			value = new Text();
		
		int pos = findSeparator(line, 0, lineLen, this.separator);
		setKeyValue(key,value,line,lineLen,pos);
		return true;
	}
	
	public int findSeparator(byte[] utf, int start, int length, byte sep) {
		for( int i = start; i < (start + length); ++ i ) {
			if( utf[i] == sep ) {
				return i;
			}
		}
		return -1;
	}
	
	public void setKeyValue(Text key, Text value, byte[] line,
							int lineLen, int pos) {
		if( pos == -1 ) {
			key.set(line, 0, lineLen);
			value.set("");
		} else {
			key.set(line, 0, pos);
			value.set(line,pos+1,lineLen-pos-1);
		}
	}
}