使用场景:
监控已输入的数据量和输出数据量的变化。比如数据清洗。
代码实现
package com.aura.hadoop.counter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @author panghu
* @description 使用计数器统计被清洗的数据条数
* @create 2021-02-17-8:52
*/
public class CounterMapper extends Mapper<LongWritable,Text,Text,NullWritable>{
// 声明计数器
Counter pass;
Counter failed;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
// 初始化计数器
pass = context.getCounter("ETL", "pass");
failed = context.getCounter("ETL", "failed");
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split(" ");
// 使用计数器
if (split.length > 11) {
pass.increment(1);
context.write(value, NullWritable.get());
} else {
failed.increment(1);
}
}
}


966

被折叠的 条评论
为什么被折叠?



