下面统计下访问次数最高的ip.
(1)主要通过比较每个ip的次数,然后保留次数大的,最后输出次数最大的.比较完后,用cleanup清除资源.
public class Kpi_IP_TopCount {
public static class TopMapper extends Mapper<Object, Text, LongWritable, NullWritable> {
long max = Long.MIN_VALUE;
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
if(value.toString().indexOf("\\")==-1){
// split
String line = value.toString();
String[] fields = line.split("\t");
long temp = Long.parseLong(fields[1]);
if(temp>max)
max = temp;
}
}
protected void cleanup(Context context) throws java.io.IOException ,InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}
public static class TopReducer extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable> {
long max = Long.MIN_VALUE;
public void reduce(LongWritable key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
long temp = key.get();
if(temp>max){
max = temp;
}
}
protected void cleanup(Context context) throws java.io.IOException ,InterruptedException {
context.write(new LongWritable(max), NullWritable.get());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "ip count topCount");
job.setJarByClass(Kpi_IP_TopCount.class);
job.setMapperClass(TopMapper.class);
//job.setMapOutputKeyClass(LongWritable.class);
//job.setMapOutputValueClass(NullWritable.class);
//job.setCombinerClass(TopReducer.class);
job.setReducerClass(TopReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_Input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_OutTopCount"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}