maven3.5+hadoop2.7.3统计米骑测试日志KPI指标(二)

本文介绍了一种使用MapReduce技术统计网站IP访问次数的方法,并通过示例日志展示了如何实现IP地址及其访问次数的统计与排序。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

前面粗略统计了某些接口的pv,下面统计下ip的访问次数,并按次数从大到小显示出来.


仍以那个1万条日志,2.5M大小,以这个作为统计样本。

(1)这是部分日志:

183.136.190.40 - - [18/Mar/2017:03:56:58 +0800] "GET / HTTP/1.1" 502 574 "-" "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET / HTTP/1.1" 200 964 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/css/poposlides.css HTTP/1.1" 200 1855 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/js/jquery-1.8.3.min.js HTTP/1.1" 200 37522 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:25 +0800] "GET /miqizhuye/js/poposlides.js HTTP/1.1" 200 1544 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/686474292697824904.jpg HTTP/1.1" 200 178452 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/56791768461410450.jpg HTTP/1.1" 200 193835 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/574881576306843042.jpg HTTP/1.1" 200 226224 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/308882832965651915.jpg HTTP/1.1" 200 272736 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
183.39.91.88 - - [18/Mar/2017:11:06:26 +0800] "GET /miqizhuye/images/637465093667333887.jpg HTTP/1.1" 200 357482 "http://misbike.com/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"

(2)统计ip和ip次数:

public class Kpi_IPCountBean implements WritableComparable<Kpi_IPCountBean>{
	private String ip;
	private Integer ipcount;
	public String getIp() {
		return ip;
	}
	public void setIp(String ip) {
		this.ip = ip;
	}
	public Integer getIpcount() {
		return ipcount;
	}
	public void setIpcount(Integer ipcount) {
		this.ipcount = ipcount;
	}
	@Override
	public String toString() {
		return this.ip + "\t" + this.ipcount;
	}
	
	public void setIpCount(String ip, Integer ipcount){
		this.ip = ip;
		this.ipcount = ipcount;
	}
	
	/**
	 * serialize
	 */
	public void write(DataOutput out) throws IOException {
		out.writeUTF(ip);
		out.writeInt(ipcount);
		
	}

	/**
	 * deserialize
	 */
	public void readFields(DataInput in) throws IOException {
		this.ip = in.readUTF();
		this.ipcount = in.readInt();
		
	}
	

	public int compareTo(Kpi_IPCountBean o) {
		if(this.ipcount > o.getIpcount()){
			return -1; 
		} else {
			return 1;
		}
	}
}

Kpi_IPCount:

public class Kpi_IPCount {
	public static class IntSumMapper extends Mapper<Object, Text, Text, Kpi_IPCountBean> {
		private Kpi_IPCountBean bean = new Kpi_IPCountBean();
		private Text word = new Text();

		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {			
			if(value.toString().indexOf("\\")==-1){				
				//过滤不成功的请求
    			String line = StringHandleUtils.filterLog(value.toString());
    			String[] fields = line.split(" ");    			
    			String ip = fields[0];
                word.set(ip);
                Integer count = 1;  //每次ip出现,次数为1                
                bean.setIpCount(ip, count);
                context.write(word, bean);
			}
			
		}
	}

	public static class IntSumReducer extends Reducer<Text, Kpi_IPCountBean, Text, Kpi_IPCountBean> {
		private Kpi_IPCountBean bean = new Kpi_IPCountBean();

		public void reduce(Text key, Iterable<Kpi_IPCountBean> values, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			for (Kpi_IPCountBean val : values) {
				sum += val.getIpcount();
			}
			bean.setIpCount("", sum);
			context.write(key, bean);
		}
	}

	public static void main(String[] args) throws Exception {		
		Configuration conf = new Configuration();
		
		Job job = new Job(conf, "ip count");
		job.setJarByClass(Kpi_IPCount.class);
		job.setMapperClass(IntSumMapper.class);
		job.setCombinerClass(IntSumReducer.class);
		job.setReducerClass(IntSumReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Kpi_IPCountBean.class);
		
		FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLog10000Input"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLog10000Output"));
		
			
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

生成的统计结果如下(一小部分):
1.204.61.138		3
1.68.16.195		5
1.68.21.22		3
1.81.91.166		5
101.226.102.237		3
101.226.102.52		2
101.226.102.59		2
101.226.102.78		2
101.226.102.80		9
101.226.102.89		10
101.226.102.94		13
101.226.103.69		4
101.226.114.166		5
101.226.125.108		6
101.226.125.109		3
101.226.125.113		4
101.226.125.114		5
101.226.125.115		1
101.226.125.116		2

(3)将以上的次数从大到小排序:

public class Kpi_IPCountSort {
	public static class SortMapper extends Mapper<Object, Text, Kpi_IPCountBean, NullWritable> {
		private Kpi_IPCountBean bean = new Kpi_IPCountBean();

		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			
			if(value.toString().indexOf("\\")==-1){
				// split 
    			String line = value.toString();
    			String[] fields = line.split("\t");    			
    			String ip = fields[0];
                Integer count = Integer.parseInt(fields[1]);
                bean.setIpCount(ip, count);
                context.write(bean, NullWritable.get());
			}
			
		}
	}

	public static class SortSumReducer extends Reducer<Kpi_IPCountBean, NullWritable, Text, Kpi_IPCountBean> {
		private Text word = new Text();
		public void reduce(Kpi_IPCountBean bean, Iterable<NullWritable> values, Context context)
				throws IOException, InterruptedException {			
			String ip = bean.getIp();
			//word.set(ip);
			context.write(word, bean);
		}
	}

	public static void main(String[] args) throws Exception {		
		Configuration conf = new Configuration();

		Job job = new Job(conf, "ip count sort");
		job.setJarByClass(Kpi_IPCountSort.class);
		job.setMapperClass(SortMapper.class);
		job.setMapOutputKeyClass(Kpi_IPCountBean.class);
		job.setMapOutputValueClass(NullWritable.class);
		//job.setCombinerClass(SortSumReducer.class);
		
		job.setReducerClass(SortSumReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Kpi_IPCountBean.class);		
		
		FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_Input"));
		FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/kpi_ip_log_Output"));
					
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

生成的结果如下(一小部分):

	183.39.91.37	2209
	218.18.79.26	616
	183.39.91.88	444
	123.235.55.196	299
	14.154.168.200	197
	113.89.233.207	182
	119.122.8.43	98
	14.25.253.183	91
	113.116.25.164	83
	121.35.224.57	79
	117.136.79.174	79
	39.158.207.242	74
	112.224.67.189	71
	112.97.57.234	64
	112.97.63.15	61
	222.210.39.217	60
	101.232.163.249	60
	112.97.61.183	59
	112.224.69.37	55
	61.52.49.155	55



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值