这次把日志记录中某些特定的访问ip区分出来.比如101.226.93, 112.17.244, 218.26.54开头的ip区分放到另一个输出文件里面.
这次样本数据还是以那个1万条日志,2.5M大小,以这个作为统计样本。
见文章:http://blog.youkuaiyun.com/cafebar123/article/details/73928303
(1)统计每个访问ip出现的次数,这个不说了,见文章:http://blog.youkuaiyun.com/cafebar123/article/details/73928303
(2)把101.226.93, 112.17.244, 218.26.54开头的ip放到一个输出文件里面:
public class Kpi_IP_Provider {
public static class IntSumMapper extends Mapper<Object, Text, Text, Kpi_IPCountBean> {
private Kpi_IPCountBean bean = new Kpi_IPCountBean();
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
if(value.toString().indexOf("\\")==-1){
//过滤不成功的请求
String line = StringHandleUtils.filterLog(value.toString());
String[] fields = line.split(" ");
String ip = fields[0];
word.set(ip);
Integer count = 1; //每次ip出现,次数为1
bean.setIpCount(ip, count);
context.write(word, bean);
}
}
}
public static class IntSumReducer extends Reducer<Text, Kpi_IPCountBean, Text, Kpi_IPCountBean> {
private Kpi_IPCountBean bean = new Kpi_IPCountBean();
public void reduce(Text key, Iterable<Kpi_IPCountBean> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (Kpi_IPCountBean val : values) {
sum += val.getIpcount();
}
bean.setIpCount("", sum);
context.write(key, bean);
}
}
public static class ServiceProviderPartitioner extends Partitioner<Text, Kpi_IPCountBean>{
private static Map<String, Integer> providerMap = new HashMap<String, Integer>();
static{
providerMap.put("101.226.93", 1);
providerMap.put("112.17.244", 1);
providerMap.put("218.26.54", 1);
}
public int getPartition(Text key, Kpi_IPCountBean value, int number){
String ip = key.toString();
String ipField = ip.substring(0, 10);
Integer p = providerMap.get(ipField);
if(p == null)
p = 0;
return p;
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "ip count provider");
job.setJarByClass(Kpi_IP_Provider.class);
job.setMapperClass(IntSumMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Kpi_IPCountBean.class);
//设置reduce默认的Partitioner
job.setPartitionerClass(ServiceProviderPartitioner.class);
//此处需要设置reduce的数量
job.setNumReduceTasks(2);
FileInputFormat.addInputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLog10000Input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://119.29.174.43:9000/user/hadoop/miqiLogOutProvider"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果: