1 wordcount
2 倒排序
3 自定义分区(不同规则输出到不同的文件)
4 自定义文件输出
5 统计文件流
1 自定义输出类
package com.wzt.mapreduce.custom;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* @author root
*
* @param <Text> reduce 输出的key类型 value类型
* @param <LongWritable>
*/
public class MyCustomOutputFormat<Text, LongWritable> extends FileOutputFormat<Text, LongWritable>{
@Override
public RecordWriter<Text, LongWritable> getRecordWriter(TaskAttemptContext job)
throws IOException, InterruptedException {
Configuration conf = job.getConfiguration();
FileSystem hdfs = FileSystem.get(conf);
FSDataOutputStream os1 = hdfs.create(new Path("/wc/output1/file1.log"));
FSDataOutputStream os2 = hdfs.create(new Path("/wc/output2/file2.log"));
return new MyRecordWriter<Text, LongWritable>( os1, os2);
}
public static class MyRecordWriter<Text, LongWritable> extends RecordWriter<Text, LongWritable>{
FSDataOutputStream os1 = null ;
FSDataOutputStream os2 = null;
public MyRecordWriter(FSDataOutputStream os1, FSDataOutputStream os2) {
this.os1 = os1 ;
this.os2 = os2 ;
}
@Override
public void write(Text key, LongWritable value2) throws IOException,
InterruptedException {
Long hang = Long.parseLong( value2.toString());
if(hang%2==0){
os1.writeBytes(key.toString() );
}else{
os2.writeBytes(key.toString() );
}
}
@Override
public void close(TaskAttemptContext context) throws IOException,
InterruptedException {
if(os1!=null){
os1.close();
}
if(os2!=null){
os2.close();
}
}
}
}
2 Mapper 数据整理类
package com.wzt.mapreduce.custom;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CIOMapper extends Mapper<LongWritable, Text , Text, LongWritable >{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] words = StringUtils.split(line, " ");
for(String word :words ){
context.write( new Text(word) , key );;
}
}
}
3 运行的主类(Map中数据直接输出所以没有使用到reducer)
package com.wzt.mapreduce.custom;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CIORunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration() ;
Job job = Job.getInstance(conf) ;
job.setJarByClass(CIORunner.class );
job.setMapperClass( CIOMapper.class );
//job.setReducerClass( CIOReducer.class ); 没有reducer就不用了
job.setOutputKeyClass( Text.class );
job.setOutputValueClass(LongWritable.class );
job.setMapOutputKeyClass( Text.class);
job.setMapOutputValueClass( LongWritable.class );
job.setOutputFormatClass(MyCustomOutputFormat.class);
FileInputFormat.setInputPaths(job, "/wc/input/xiyou.txt");
FileOutputFormat.setOutputPath(job, new Path("/wc/outputcount"));
// FileInputFormat.setInputPaths(job, "D:\\wordcount\\wordcount.txt");
// FileOutputFormat.setOutputPath(job, new Path("D:\\wordcount\\output"));
job.waitForCompletion(true) ;
}
}