说明
Hadoop为每个作业维护若干内置计数器,以描述多项指标。例如,某些计数器记录已处理的字节数和记录数,使用户可监控已处理的输入数据量和以产生的输出数据量。
计数器API
- 采用枚举的方法统计计数
enum MyCounter{MALFORORMED,NORMAL} //对枚举定义的自定义计数器加1 context.getCounter(MyCounter.MALFOFORMED).increment(1);
- 采用计数数组,计数器名称的方式统计
context.getCounter("counterGroup","counter").increment(1);
示例
统计文件中符合规则的数据行数和不符合规则的数据行数,字符串字段长度大于3为符合规则
示例文件:
CounterDriver:驱动类
package com.xing.mr.Counter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @Classname CounterDriver
* @Description TODO
* @Date 2019/4/13 20:03
* @Created by dell
*/
public class CounterDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("hadoop.home.dir","F:\\hadoop-2.7.1");
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(configuration);
Job job = Job.getInstance(configuration);
// 设置加载类
job.setJarByClass(CounterDriver.class);
// 设置map和reduce类
job.setMapperClass(CounterMapper.class);
// 设置mapper输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
FileInputFormat.setInputPaths(job,new Path("E:\\hdfs\\input\\word.txt"));
Path outPath = new Path("E:\\hdfs\\output");
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath);
boolean waitForCompletion = job.waitForCompletion(true);
System.out.println(waitForCompletion);
System.exit(waitForCompletion?0:-1);
}
}
CounterMapper:mapper处理类
package com.xing.mr.Counter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @Classname CounterMapper
* @Description TODO
* @Date 2019/4/13 21:16
* @Created by dell
*/
public class CounterMapper extends Mapper<LongWritable,Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 获取一行的数据
String valueString = value.toString();
// 判断是否符合规则
boolean flag = valitValue(valueString,context);
// 如果符合输出记录
if (flag){
context.write(value,NullWritable.get());
}else {
return;
}
}
/**
* 判断是否符合规则的方法
* @param valueString
* @param context
* @return
*/
private boolean valitValue(String valueString, Context context) {
String[] split = valueString.split("\t", -1);
if (split.length>=3){
// 大于3的计数器加1
context.getCounter("mapper","true").increment(1);
return true;
}else {
context.getCounter("mapper","false").increment(1);
return false;
}
}
}
输出结果:图中红色地方