outputformat自定义--数据过滤
需求:过滤日志文件
把包含itstaredu的放在一个文件
把不包含itstaredu的放在一个文件
public class FuncFileOutputFormat extends FileOutputFormat<Text, NullWritable>{
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttempContext job)
throws IOException, InterruptException {
FileRecordWriter fileRecordWriter = new FileRecordWriter(job);
return fileRecordWriter;
}
}
public class FileRecordWriter extends RecordWriter<Text, NullWritable>{
FSDataOutputStream other = null;
FSDataOutputStream itstarlog = null;
Configuration conf = null;
//1、定义数据输出路径
public FileRecordWriter(TaskAttempContext job){
//需要配置信息
conf = job.getConfiguration();
//获取文件系统
FileSystem fs = FileSystem.get(conf);
//定义输出路径
itstarlog = fs.create(new Path("C:/outitstaredu/itstar.logs"));
other = fs.create(new Path("C:/outputother/other.logs"));
}
//2、数据输出
@Override
public void writer(Text key, NullWritable value) throws IOException, InterruptException{
//判断的话根据key
if(key.toString().contains("itstar")){
//写出道文件
itstarlog.write(key.getBytes());
}else if {
other.write(key.getBytes());
}
}
//3、关闭资源
@Override
public void close(TaskAttempContext context) throws IOException, InterruptException{
if(null != itstarlog){
itstarlog.close();
}
if(null != other){
other.close();
}
}
}
public class FileMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
@Override
public void map(LongWritable key, Text value, Context context)
throw IOException, InterruptException{
//输出
context.write(value, NullWritable.get());
}
}
public class FileReducer extends Reducer<Text, NullWritable, Text, NullWritable>{
@Override
public void reduce(Text key, Interable<NullWritable> value, Context context)
throw IOException, InterruptException{
//输出
String k = key.toString;
context.write(new Text(k), NullWritable.get());
}
}
public class FileDriver{
public static void main(String[] args) throws IOException, ClassNotFoundException,InterruptException{
Configuration conf = new Configuration();
Job job = Job.getInstance();
job.setJarByClass(FileDriver.class);
job.setMapperClass(FileMapper.class);
job.setReducerClass(FileReducer.class);
job.setMapOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//设置自定义的outputformat
job.setOutputFormatClass(FuncFileOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path("C:/in"));
FileOutputFormat.setOutputPath(job, new Path("c:/out"));
job.waitForCompletion(true);
}
}