原文链接:http://blog.fens.me/hadoop-mapreduce-log-kpi/
数据类来源:网站访问记录
先上代码:
对原作者的代码进行了部分更改以适应新版本的Hadoop,同时记录下自己的问题和查找答案,方便以后快速回忆
package org.apache.hadoop.examples;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
public class Kpi {//bean,将数据封装,转化为String
private String remote_add;
private String remote_user;
private String time_local;
private String request;
private String statues;
private String body_bytes_sent;
private String http_referer;
private String http_user_agent;
private boolean valid = true;
public String toString(){
StringBuilder sb = new StringBuilder();
sb.append("valid:"+this.valid);
sb.append("\nremote:_addr:"+this.remote_add);
sb.append("\nremote_user:"+this.remote_user);
sb.append("\ntime_local:"+this.time_local);
sb.append("\request:"+this.request);
sb.append("\nstatues:"+this.statues);
sb.append("\nbody_statues:"+this.body_bytes_sent);
sb.append("\nhttp_referer:"+this.http_referer);
sb.append("\nhttp_user_agent:"+this.http_user_agent);
return sb.toString();
}
public String getRemote_add() {
return remote_add;
}
public void setRemote_add(String remote_add) {
this.remote_add = remote_add;
}
public String getRemote_user() {
return remote_user;
}
public void setRemote_user(String remote_user) {
this.remote_user = remote_user;
}
public String getTime_local() {
return time_local;
}
public void setTime_local(String time_local) {
this.time_local = time_local;
}
public String getRequest() {
return request;
}
public void setRequest(String request) {
this.request = request;
}
public String getStatues() {
return statues;
}
public void setStatues(String statues) {
this.sta