package com.qingjiao.traning.mr.weblog.pv;
import com.qingjiao.traning.mr.weblog.UserUtils;
import com.qingjiao.traning.mr.weblog.WebLog;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/*
input: key=> 行偏移量 value=> 一行日志
output: <url,1>
*/
public class PVMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
// 初始化key和value
Text k =new Text();
IntWritable v=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
// 读取一行数据,调用对应的方法实现对日志的过滤和清洗
WebLog webLog = UserUtils.filterByPVs(value.toString());
// 判断日志是否合法,合法参与数据统计
if (webLog.isFlag()) {
// 将用户请求的url封装为key
k.set(webLog.getRequestURL());
context.write(k,v);
}
}
}