record-mr-hive

LogEntity


public class LogEntity implements Writable {
    public final static String delimiter = "\\|@@\\|";
    private String remote_addr;// 记录客户端的ip地址
    private String time_local;// 记录访问时间与时区
    private String request;// 记录请求的url与http协议
    private String exetime;// 记录请求的url与http协议
    private String status;// 记录请求状态;成功是200
    private String body_bytes_sent;// 记录发送给客户端文件主体内容大小
    private String http_referer;// 用来记录从那个页面链接访问过来的
    private String http_user_agent;// 记录客户浏览器的相关信息
    private String req_body;
    private boolean valid = true;// 判断数据是否合法

    public String getExetime() {
        return exetime;
    }

    public void setExetime(String exetime) {
        this.exetime = exetime;
    }

    public String getReq_body() {
        return req_body;
    }

    public void setReq_body(String req_body) {
        this.req_body = req_body;
    }

    public String getRemote_addr() {
        return remote_addr;
    }

    public void setRemote_addr(String remote_addr) {
        this.remote_addr = remote_addr;
    }

    public String getTime_local() {
        return time_local;
    }

    public void setTime_local(String time_local) {
        this.time_local = time_local;
    }

    public String getRequest() {
        return request;
    }

    public void setRequest(String request) {
        this.request = request;
    }

    public String getStatus() {
        return status;
    }

    public void setStatus(String status) {
        this.status = status;
    }

    public String getBody_bytes_sent() {
        return body_bytes_sent;
    }

    public void setBody_bytes_sent(String body_bytes_sent) {
        this.body_bytes_sent = body_bytes_sent;
    }

    public String getHttp_referer() {
        return http_referer;
    }

    public void setHttp_referer(String http_referer) {
        this.http_referer = http_referer;
    }

    public String getHttp_user_agent() {
        return http_user_agent;
    }

    public void setHttp_user_agent(String http_user_agent) {
        this.http_user_agent = http_user_agent;
    }

    public boolean isValid() {
        return valid;
    }

    public void setValid(boolean valid) {
        this.valid = valid;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append(this.remote_addr);
        sb.append("\001").append(this.time_local);
        sb.append("\001").append(this.exetime);
        sb.append("\001").append(this.request);
        sb.append("\001").append(this.status);
        sb.append("\001").append(this.body_bytes_sent);
        sb.append("\001").append(this.http_referer);
        sb.append("\001").append(this.http_user_agent);
        sb.append("\001").append(this.req_body);
        return sb.toString();
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        remote_addr = in.readUTF();
        time_local = in.readUTF();
        exetime = in.readUTF();
        request = in.readUTF();
        status = in.readUTF();
        body_bytes_sent = in.readUTF();
        http_referer = in.readUTF();
        http_user_agent = in.readUTF();
        req_body = in.readUTF();
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeUTF(remote_addr);
        out.writeUTF(time_local);
        out.writeUTF(exetime);
        out.writeUTF(request);
        out.writeUTF(status);
        out.writeUTF(body_bytes_sent);
        out.writeUTF(http_referer);
        out.writeUTF(http_user_agent);
        out.writeUTF(req_body);
    }
}

LogParser


public class LogParser {

    static SimpleDateFormat sd1 = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.US);

    static SimpleDateFormat sd2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    public static LogEntity parser(String line) {
        LogEntity log = new LogEntity();
        String[] arr = line.split(log.delimiter);
        if (arr.length >= 9) {
            log.setRemote_addr(arr[0]);
            log.setTime_local(parseTime(arr[1]));
            log.setRequest(arr[2]);
            log.setExetime(arr[3]);
            log.setStatus(arr[4]);
            log.setBody_bytes_sent(arr[5]);
            log.setHttp_referer(arr[6]);
            log.setHttp_user_agent(arr[7]);
            log.setReq_body(arr[8]);
            if (Integer.parseInt(log.getStatus()) >= 400) {// 大于400,HTTP错误
                log.setValid(false);
            } else if (log.getRequest().indexOf("/api/open/files") != -1 || log.getRequest().equals("GET / HTTP/1.1")
                    || log.getRequest().equals("HEAD / HTTP/1.0")) {
                log.setValid(false);
            }
        } else {
            log.setValid(false);
        }
        return log;
    }

    public static String parseTime(String dt) {

        String timeString = "";
        try {
            Date parse = sd1.parse(dt);
            timeString = sd2.format(parse);

        } catch (ParseException e) {
            e.printStackTrace();
        }

        return timeString;
    }
    }

Task


public class Clear2 {

    static class ConverterMap extends Mapper<LongWritable, Text, NullWritable, LogEntity> {
        public NullWritable v = NullWritable.get();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            LogEntity log = LogParser.parser(line);
            if (log.isValid()) {
                context.write(v, log);
            }
        }
    }

    static class DatePartitioner extends Partitioner<NullWritable, LogEntity> {
        static Set<Integer> set = new HashSet<>();

        @Override
        public int getPartition(NullWritable key, LogEntity log, int numPartitions) {
            int id = Integer.parseInt(log.getTime_local().substring(8, 10));
            if (!set.contains(id)) {
                System.out.println(log.getTime_local() + "\t" + id);
                set.add(id);
            }
            return id % 17;
        }
    }

    static class WriteReducer extends Reducer<NullWritable, LogEntity, NullWritable, LogEntity> {
        public NullWritable v = NullWritable.get();

        @Override
        protected void reduce(NullWritable k, Iterable<LogEntity> logs, Context context)
                throws IOException, InterruptedException {
            for (LogEntity log : logs) {
                context.write(k, log);
            }
        }

    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        Job job = Job.getInstance(conf);
        job.setJarByClass(Clear2.class);

        // 指定mapper输出数据的kv类型
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(LogEntity.class);

        // 指定最终输出的数据的kv类型
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(LogEntity.class);

        job.setMapperClass(ConverterMap.class);
        job.setPartitionerClass(DatePartitioner.class);
        job.setReducerClass(WriteReducer.class);

        File input = new File("D:\\logs\\08");
        File output = new File("D:\\logs\\08_out");

        FileInputFormat.setInputPaths(job, new Path(input.getAbsolutePath()));
        FileOutputFormat.setOutputPath(job, new Path(output.getAbsolutePath()));

        job.setNumReduceTasks(17);
        job.waitForCompletion(true);

    }

}

hive命令

DROP TABLE IF EXISTS log;
create table if not exists log(
ip string,
time string,
exetime float,
request string,
status int,
body_bytes int,
referer string,
user_agent string,
terminal int,
channel string,
mobile_type string,
body string
)
row format delimited fields terminated by '\001' 
stored as textfile;

load data local inpath '/home/hadoop/idongri_nginx_logs/08/08_out' 
overwrite into table log;


DROP TABLE IF EXISTS log_time;

create table if not exists log_time(
ip string,
time string,
dt string,
hms string,
m int,
h int,
d int,
exetime float,
request string,
status int,
body_bytes int,
referer string,
user_agent string,
terminal int,
channel string,
mobile_type string,
body string
)
partitioned by(month string)
row format delimited fields terminated by '\001' 
stored as textfile;

insert into log_time partition(month='2017-08')
select c.ip,c.time,
substring(c.time,0,10) as dt,
substring(c.time,12) as hms,
substring(c.time,6,2) as m,
substring(c.time,9,2) as d,
substring(c.time,11,3) as h,
c.exetime,
c.request,c.status,c.body_bytes,
c.referer,c.user_agent,
c.terminal,c.channel,c.mobile_type,c.body
from log c ;

alter table time_log rename to log_time;

drop table if exists pv;
create table if not exists pv(
  dt string,
  h int,
  terminal int,
  channel string,
  cnt int
)
partitioned by(month string)
row format delimited fields terminated by '\001' 
stored as textfile;

insert into pv partition(month='2017-08')
select dt,h,terminal,channel,count(*) from log_time group by dt,h,channel,terminal;

mysql

drop table log;
create table if not exists log(
ip varchar(20),
time varchar(20),
exetime decimal(9,2),
request text,
status int,
body_bytes int,
referer varchar(200),
user_agent varchar(300),
terminal int,
channel varchar(20),
mobile_type varchar(100),
body text
);

hive导入mysql

/home/hadoop/apps/sqoop-1.4.6/bin/sqoop export \
--connect "jdbc:mysql://192.168.199.100:3306/sakila" \
--username root \
--password root \
--table log \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/test.db/log;
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值