最近有一个需求,统计网站在世界各大洲的访问量情况,由于我们的访问量是通过记录到mongodb日志实现的,在统计的过程中我使用到了 mongodb的mapreduce ,淘宝的ip查询接口以及spring的定时任务,下面贴出实现的过程:
首先,日志结构:
{
"_id" : ObjectId("5a3a02edcf7eef3d57a83bbb"),
"log_event" : "visit",
"access_site" : "115.68.28.11:8080",
"access_time" : ISODate("2017-12-20T01:48:06Z"),
"access_path" : "/index.php/forum/",
"resource_id" : "",
"owner_id" : "",
"user_agent" : "WEB",
"user_host" : "59.39.145.178",
"status" : 200,
"access_user_id" : "",
"referer_url" : ""
}
mongo的mapreduce包括连个js方法:
首先map.js:
function(){
var date_1 = new Date(this.access_time);
var yearValue=date_1.getFullYear();
var monthValue=date_1.getMonth()+1;
var week_count;
var d_ = new Date();
// 获取每时间是这个月的第几周
d_.setFullYear(yearValue, monthValue-1, 1);
var w1_ = d_.getDay();
if (w1_ == 0) w1_ = 7;
// total day of month
d_.setFullYear(yearValue, monthValue, 0);
var dd_ = d_.getDate();
// first Monday
if (w1_ != 1) d1_ = 7 - w1_ + 2;
else d1_ = 1;
week_count = Math.ceil((dd_-d1_+1)/7);
if(this.log_event=="visit"){
emit(yearValue+"-"+monthValue+"-"+week_count+"-"+this.user_host,1);
}
}
对于map,我的理解是拼装key 和 数据 key 可以重复。再看reduce
function(key,values){
var sum = 0;
for (var i = 0; i < values.length; i++)
sum += values[i];
return sum;
}
这样我能统计到 把所有key相同的value相加的结果集。
例如我的map拿到的数据有:
key:a value:1
key:a value:1
key:b value:1
key:c value:1
key:c value:1
这样我经过reduce函数之后:
key:a value:2
key:b value:1
key:c value:2
然后java的调用:
@Resource
private MongoOperations mongoOps;
MapReduceResults<UserDistributionDTO> results = mongoOps.mapReduce("log.access.events",
"classpath:static/js/userDistributionMapReduce/userDistribution_map.js",
"classpath:static/js/userDistributionMapReduce/userDistribution_reduce.js",
UserDistributionDTO.class);
贴上UserDistributionDTO
package com.bigdata.lab.ymlib.dto;
/**
* @author xuchuang
* @updateTime 2018年1月3日 下午4:19:03
*/
public class UserDistributionDTO {
private String id;
private Integer value;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Integer getValue() {
return value;
}
public void setValue(Integer value) {
this.value = value;
}
}
贴上淘宝ip查询工具类,以及自己实现的几个工具方法 如下:
package com.bigdata.lab.ymlib.util;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import net.sf.json.JSONObject;
public class AddressUtils {
private static final String SINA_IP_URL = "http://int.dpool.sina.com.cn/iplookup/iplookup.php?format=json";
public static String getProvince(String content, String encodingString) throws UnsupportedEncodingException {
// 这里调用淘宝API
String urlStr = "http://ip.taobao.com/service/getIpInfo.php";
// 取得IP所在的省市区信息
String returnStr = getResult(urlStr, content, encodingString);
if (returnStr != null) {
StringBuffer ipConvertAddress = new StringBuffer();
String province = searchValue(returnStr, "region");
if (null != province && !"\"\"".equals(province) && !"".equals(province)) {
ipConvertAddress.append(province);
}
return ipConvertAddress.toString();
}
return null;
}
public static String getAddresses(String content, String encodingString) throws UnsupportedEncodingException {
// 这里调用淘宝API
String urlStr = "http://ip.taobao.com/service/getIpInfo.php";
// 取得IP所在的省市区信息
String returnStr = getResult(urlStr, content, encodingString);
if (returnStr != null) {
StringBuffer ipConvertAddress = new StringBuffer();
String province = searchValue(returnStr, "region");
if (null != province && !"\"\"".equals(province) && !"".equals(province)) {
ipConvertAddress.append(province);
}
String city = searchValue(returnStr, "city");
if (null != city && !"\"\"".equals(city) && !"".equals(city)) {
ipConvertAddress.append(city);
}
// if (null != city && !"\"\"".equals(city) && !"".equals(city) &&
// !"内网IP".equals(city)) {
// ipConvertAddress.append(city);
// }
// String county = searchValue(returnStr, "county");
// if (null != county && !"\"\"".equals(county) &&
// !"".equals(county)) {
// ipConvertAddress.append(county);
// }
return ipConvertAddress.toString();
}
return null;
}
private static String getResult(String urlStr, String content, String encoding) {
URL url = null;
DataOutputStream out = null;
BufferedReader reader = null;
HttpURLConnection connection = null;
try {
url = new URL(urlStr);
connection = (HttpURLConnection) url.openConnection();// 新建连接实例
connection.setConnectTimeout(5000);// 设置连接超时时间,单位毫秒
connection.setReadTimeout(5000);// 设置读取数据超时时间,单位毫秒
connection.setDoOutput(true);// 是否打开输出流 true|false
connection.setDoInput(true);// 是否打开输入流true|false
connection.setRequestMethod("POST");// 提交方法POST|GET
connection.setUseCaches(false);// 是否缓存true|false
connection.connect();// 打开连接端口
out = new DataOutputStream(connection.getOutputStream());// 打开输出流往对端服务器写数据
out.writeBytes(content);// 写数据,也就是提交你的表单 name=xxx&pwd=xxx
out.flush();// 刷新
out.close();// 关闭输出流
reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), encoding));// 往对端写完数据对端服务器返回数据
// ,以BufferedReader流来读取
StringBuffer buffer = new StringBuffer();
String line = "";
while ((line = reader.readLine()) != null) {
buffer.append(line);
}
reader.close();
return buffer.toString();
} catch (IOException e) {
System.out.println("根据ip获取地址信息出错" + e.getMessage());
e.printStackTrace();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (null != reader) {
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (connection != null) {
connection.disconnect();// 关闭连接
}
}
return null;
}
public static String decodeUnicode(String theString) {
char aChar;
int len = theString.length();
StringBuffer outBuffer = new StringBuffer(len);
for (int x = 0; x < len;) {
aChar = theString.charAt(x++);
if (aChar == '\\') {
aChar = theString.charAt(x++);
if (aChar == 'u') {
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString.charAt(x++);
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException("Malformed encoding.");
}
}
outBuffer.append((char) value);
} else {
if (aChar == 't') {
aChar = '\t';
} else if (aChar == 'r') {
aChar = '\r';
} else if (aChar == 'n') {
aChar = '\n';
} else if (aChar == 'f') {
aChar = '\f';
}
outBuffer.append(aChar);
}
} else {
outBuffer.append(aChar);
}
}
return outBuffer.toString();
}
// 从返回信息中截取省、市、区/县
public static String searchValue(String remoteIpInfo, String key) {
String _value = "";
if (org.apache.commons.lang.StringUtils.isNotBlank(remoteIpInfo)) {
_value = org.apache.commons.lang.StringUtils.substringBetween(remoteIpInfo, "\"" + key + "\":", ",");
if (org.apache.commons.lang.StringUtils.isNotBlank(_value)) {
_value = decodeUnicode(_value);
if (_value.length() > 2) {
_value = _value.substring(1, _value.lastIndexOf("\""));
}
}
}
return _value;
}
public static String ipConvertAddress(String ip) {
String address = "测试地址";
if (null == ip || "127.0.0.1".equals(ip) || ip.matches("192.9.*+")) {
return address;
}
try {
address = getAddresses("ip=" + ip, "utf-8");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return address;
}
public static String ipConvertProvince(String ip) {
String address = "测试地址";
if (null == ip || "127.0.0.1".equals(ip) || ip.matches("192.9.*+")) {
return address;
}
try {
address = getProvince("ip=" + ip, "utf-8");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return address;
}
// 新浪ip地址解析
public static String sinaIpAddress(String ip, String field) {
CloseableHttpResponse response = null;
if (null == ip || "127.0.0.1".equals(ip) || ip.matches("192.9.*+")) {
return null;
}
HttpGet httpGet = new HttpGet(SINA_IP_URL+"&ip="+ip);
StringBuffer st = new StringBuffer("");
try {
httpGet.setHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_UTF8_VALUE);
CloseableHttpClient client = HttpClients.createDefault();
response = client.execute(httpGet);
int status = response.getStatusLine().getStatusCode();
String respContent = EntityUtils.toString(response.getEntity(), "UTF-8");
JSONObject respObject = JSONObject.fromObject(respContent);
if ("province".equals(field)) {
st.append(respObject.get("province"));
} else if ("city".equals(field)) {
st.append(respObject.get("city"));
} else if ("country".equals(field)) {
st.append(respObject.get("country"));
} else if ("all".equals(field)){
st.append(respObject.get("country")).append(respObject.get("province")).append(respObject.get("city"));
} else {
st.append(respObject.get("province")).append(respObject.get("city"));
}
} catch (Exception e) {
System.out.println("新浪ip接口解析地址失败");
e.printStackTrace();
}
return st.toString();
}
public static String getContryZhou(String contryName){
String zhouName="";
List<String> Asia= Arrays.asList(
"中国","台湾","蒙古","朝鲜","韩国","日本","菲律宾","越南","老挝","柬埔寨","缅甸","泰国","马来西亚","文莱","新加坡","印度尼西亚",
"东帝汶","尼泊尔","不丹","孟加拉国","印度","巴基斯坦","斯里兰卡","马尔代夫","哈萨克斯坦","吉尔吉斯斯坦","塔吉克斯坦","乌兹别克斯坦",
"土库曼斯坦","阿富汗","伊拉克","伊朗","叙利亚","约旦","黎巴嫩","以色列","巴勒斯坦","沙特阿拉伯","巴林","卡塔尔","科威特","阿拉伯联合酋长国(阿联酋)",
"阿曼","也门","格鲁吉亚","亚美尼亚","阿塞拜疆","土耳其","塞浦路斯"
);
List<String> Europe= Arrays.asList(
"芬兰","瑞典","挪威","冰岛","丹麦 法罗群岛(丹)","爱沙尼亚","拉脱维亚","立陶宛","白俄罗斯","俄罗斯","乌克兰","摩尔多瓦","波兰","捷克","斯洛伐克","匈牙利","德国","奥地利","瑞士","列支敦士登",
"英国","爱尔兰","荷兰","比利时","卢森堡","法国","摩纳哥","罗马尼亚","保加利亚","塞尔维亚","马其顿","阿尔巴尼亚","希腊","斯洛文尼亚","克罗地亚","波斯尼亚","墨塞哥维那意大利","梵蒂冈","圣马力诺",
"马耳他","西班牙","葡萄牙","安道尔"
);
List<String> NorthAmerica=Arrays.asList(
"加拿大","美国","墨西哥","格陵兰(丹)","危地马拉","伯利兹","萨尔瓦多","洪都拉斯","尼加拉瓜","哥斯达黎加","巴拿马","巴哈马","古巴","牙买加","海地","多米尼加共和国","安提瓜和巴布达","圣基茨和尼维斯",
"多米尼克","圣卢西亚","圣文森特和格林纳丁斯","格林纳达","巴巴多斯","特立尼达和多巴哥","波多黎各(美)","英属维尔京群岛","美属维尔京群岛","安圭拉(英)","蒙特塞拉特(英)","瓜德罗普(法)","马提尼克(法)",
"荷属安的列斯","阿鲁巴(荷)","特克斯和凯科斯群岛(英)","开曼群岛(英)","百慕大(英)"
);
List<String> SouthAmerica=Arrays.asList(
"哥伦比亚","委内瑞拉","圭亚那","法属圭亚那","苏里南","厄瓜多尔","秘鲁","玻利维亚","巴西","智利","阿根廷","乌拉圭","巴拉圭"
);
List<String> Oceania=Arrays.asList(
"澳大利亚","新西兰","巴布亚新几内亚","所罗门群岛","瓦努阿图","密克罗尼西亚","马绍尔群岛","帕劳","瑙鲁","基里巴斯","图瓦卢","萨摩亚","斐济群岛","汤加","库克群岛(新)","关岛(美)","新喀里多尼亚(法)",
"法属波利尼西亚","皮特凯恩岛(英)","瓦利斯与富图纳(法)","纽埃(新)","托克劳(新)","美属萨摩亚","北马里亚纳(美)"
);
List<String> Africa=Arrays.asList(
"埃及","利比亚","苏丹","突尼斯","阿尔及利亚","摩洛哥","亚速尔群岛(葡)","马德拉群岛(葡)","埃塞俄比亚","厄立特里亚","索马里","吉布提","肯尼亚","坦桑尼亚","乌干达","卢旺达","布隆迪","塞舌尔",
"乍得","中非","喀麦隆","赤道几内亚","加蓬","刚果共和国(即:刚果(布))","刚果民主共和国(即:刚果(金))","圣多美及普林西比","毛里塔尼亚","西撒哈拉","塞内加尔","冈比亚","马里","布基纳法索","几内亚",
"几内亚比绍","佛得角","塞拉利昂","利比里亚","科特迪瓦","加纳","多哥","贝宁","尼日尔","加那利群岛(西)","毛里塔尼亚","西撒哈拉","塞内加尔","冈比亚","马里","布基纳法索","几内亚","几内亚比绍","佛得角","塞拉利昂","利比里亚","科特迪瓦","加纳","多哥","贝宁","尼日尔","加那利群岛(西)"
);
if(Asia.contains(contryName)){
zhouName="亚洲";
}else if(Europe.contains(contryName)){
zhouName="欧洲";
}else if(NorthAmerica.contains(contryName)){
zhouName="北美洲";
}else if(SouthAmerica.contains(contryName)){
zhouName="南美洲";
}else if(Oceania.contains(contryName)){
zhouName="大洋洲";
}else if(Africa.contains(contryName)){
zhouName="非洲";
}
return zhouName;
}
public static String getIpZhou(String ip){
String urlStr = "http://ip.taobao.com/service/getIpInfo.php";
String result_str=getResult(urlStr, "ip="+ip, "utf-8");
System.out.println("result_str:"+result_str);
int count_retry=0;
while(count_retry<3 && StringUtils.isEmpty(result_str)){
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
count_retry++;
System.out.println("第"+count_retry+"次重试获取ip详情:"+"ip为:"+ip);
result_str=getResult(urlStr, "ip="+ip, "utf-8");
}
String country=searchValue(result_str, "country");
return getContryZhou(country);
}
public static void main(String[] args) {
//System.out.println(getIpZhou("203.69.66.102"));
Map<String, Integer> map_value=new HashMap<String, Integer>();
map_value.put("a",2);
map_value.put("a",3);
System.out.println(map_value.get("a"));
// String s="赞比亚、安哥拉、津巴布韦、马拉维、莫桑比克、博茨瓦纳、纳米比亚、南非、斯威士兰、莱索托、马达加斯加、科摩罗、毛里求斯、留尼旺(法)、圣赫勒拿(英)";
// String m="\""+s.replace("、", "\",\"")+"\"";
// System.out.println(m);
// AddressUtils addressUtils = new AddressUtils();
// // 测试ip 219.136.134.157 中国=华南=广东省=广州市=越秀区=电信
// String ip = "219.136.134.157";
// String address = "";
// address = ipConvertAddress(ip);
// address = sinaIpAddress(ip,"province");
// // try {
// // address = addressUtils.getAddresses("ip=" + ip, "utf-8");
// // } catch (UnsupportedEncodingException e) {
// // // TODO Auto-generated catch block
// // e.printStackTrace();
// // }
// System.out.println(address);
// // 输出结果为:广东省,广州市,越秀区
//System.out.println(AddressUtils.getContryZhou("芬兰"));
}
}