前面粗略统计了某些接口的pv,下面统计下ip的访问次数,并按次数从大到小显示出来.
仍以那个1万条日志,2.5M大小,以这个作为统计样本。
(1)这是部分日志:
183.136.190.40 - - [18/Mar/2017:03:56:58 +0800] "GET / HTTP/1.1" 502 574 "-" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET / HTTP/1.1" 200 964 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/css/poposlides.css HTTP/1.1" 200 1855 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/js/jquery-1.8.3.min.js HTTP/1.1" 200 37522 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/js/poposlides.js HTTP/1.1" 200 1544 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/686474292697824904.jpg HTTP/1.1" 200 178452 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/56791768461410450.jpg HTTP/1.1" 200 193835 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/574881576306843042.jpg HTTP/1.1" 200 226224 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/308882832965651915.jpg HTTP/1.1" 200 272736 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/637465093667333887.jpg HTTP/1.1" 200 357482 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
(2)统计ip和ip次数:
public class Kpi_IPCountBean implements WritableComparable<Kpi_IPCountBean>{
private String ip;
private Integer ipcount;
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public Integer getIpcount() {
return ipcount;
}
public void setIpcount(Integer ipcount) {
this.ipcount = ipcount;
}
@Override
public String toString() {
return this.ip + "\t" + this.ipcount;
}
public void setIpCount(String ip, Integer ipcount){
this.ip = ip;
this.ipcount = ipcount;
}
/**
* serialize
*/
public void write(DataOutput out) throws IOException {
out.writeUTF(ip);
out.writeInt(ipcount);
}
/**
* deserialize
*/
public void readFields(DataInput in) throws IOException {
this.ip = in.readUTF();
this.ipcount = in.readInt();
}
public int compareTo(Kpi_IPCountBean o) {
if(this.ipcount > o.getIpcount()){
return -1;
} else {
return 1;
}
}
}
Kpi_IPCount:
public class Kpi_IPCount {
public static class IntSumMapper extends Mapper<Object, Text, Text, Kpi_IPCountBean> {
private Kpi_IPCountBean bean = new Kpi_IPCountBean();
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
if(value.toString().indexOf("\\")==-1){
//过滤不成功的请求
String line = StringHandleUtils.filterLog(value.toString());
String[] fields = line.split(" ");
String ip = fields[0];
word.set(ip);
Integer count = 1; //每次ip出现,次数为1
bean.setIpCount(ip, count);
context.write(word, bean);
}
}
}
public static class IntSumReducer extends Reducer<Text, Kpi_IPCountBean, Text, Kpi_IPCountBean> {
private Kpi_IPCountBean bean = new Kpi_IPCountBean();
public void reduce(Text key, Iterable<Kpi_IPCountBean> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Kpi_IPCountBean val : values) {
sum += val.getIpcount();
}
bean.setIpCount("", sum);
context.write(key, bean);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "ip count");
job.setJarByClass(Kpi_IPCount.class);
job.setMapperClass(IntSumMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Kpi_IPCountBean.class);
FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLog10000Input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLog10000Output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
生成的统计结果如下(一小部分):
1.204.61.138 3
1.68.16.195 5
1.68.21.22 3
1.81.91.166 5
101.226.102.237 3
101.226.102.52 2
101.226.102.59 2
101.226.102.78 2
101.226.102.80 9
101.226.102.89 10
101.226.102.94 13
101.226.103.69 4
101.226.114.166 5
101.226.125.108 6
101.226.125.109 3
101.226.125.113 4
101.226.125.114 5
101.226.125.115 1
101.226.125.116 2
(3)将以上的次数从大到小排序:
public class Kpi_IPCountSort {
public static class SortMapper extends Mapper<Object, Text, Kpi_IPCountBean, NullWritable> {
private Kpi_IPCountBean bean = new Kpi_IPCountBean();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
if(value.toString().indexOf("\\")==-1){
// split
String line = value.toString();
String[] fields = line.split("\t");
String ip = fields[0];
Integer count = Integer.parseInt(fields[1]);
bean.setIpCount(ip, count);
context.write(bean, NullWritable.get());
}
}
}
public static class SortSumReducer extends Reducer<Kpi_IPCountBean, NullWritable, Text, Kpi_IPCountBean> {
private Text word = new Text();
public void reduce(Kpi_IPCountBean bean, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
String ip = bean.getIp();
//word.set(ip);
context.write(word, bean);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "ip count sort");
job.setJarByClass(Kpi_IPCountSort.class);
job.setMapperClass(SortMapper.class);
job.setMapOutputKeyClass(Kpi_IPCountBean.class);
job.setMapOutputValueClass(NullWritable.class);
//job.setCombinerClass(SortSumReducer.class);
job.setReducerClass(SortSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Kpi_IPCountBean.class);
FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_Input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_Output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
生成的结果如下(一小部分):
183.39.91.37 2209
218.18.79.26 616
183.39.91.88 444
123.235.55.196 299
14.154.168.200 197
113.89.233.207 182
119.122.8.43 98
14.25.253.183 91
113.116.25.164 83
121.35.224.57 79
117.136.79.174 79
39.158.207.242 74
112.224.67.189 71
112.97.57.234 64
112.97.63.15 61
222.210.39.217 60
101.232.163.249 60
112.97.61.183 59
112.224.69.37 55
61.52.49.155 55