import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class KPI_PV {
/**
* 内部类:映射器 Mapper<KEY_IN, VALUE_IN, KEY_OUT, VALUE_OUT>
*/
public static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
/**
* 重写map方法
*/
public void map(Text k1, Text v1, Context context) throws IOException,
InterruptedException {
KPI kpi = KPI.filterPVs(v1.toString());
System.out.println(kpi.isValid());
if (kpi.isValid()) {
context.write(new Text(kpi.getRemote_addr()), new LongWritable(1));
}
}
}
/**
* 内部类:拆分器 Reducer<KEY_IN, VALUE_IN, KEY_OUT, VALUE_OUT>
*/
public static class MyReducer extends
Reducer<Text, Text, Text, LongWritable> {
protected void reduce(Text k2, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (LongWritable value : values) {
sum += Integer.parseInt(value.toString());
}
context.write(k2, new LongWritable(sum));
}
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
// 声明配置信息
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://localhost:9000");
// 创建作业
Job job = new Job(conf, "KPI_PV");
job.setJarByClass(KPI_PV.class);
// 设置mr
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// 设置输出类型,和Context上下文对象write的参数类型一致
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 设置输入输出路径
FileInputFormat.setInputPaths(job, new Path("/test/"));
FileOutputFormat.setOutputPath(job, new Path("/out"));
// 执行
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
输入是6个文件,文件内容分别如下:
1.222.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
2.222.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
3.223.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
4.223.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
5.221.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
6.222.68.172.190 - - [18/Sep/2013:06:49:57 +0000] "GET /images/my.jpg HTTP/1.1" 200 19939 "http://www.angularjs.cn/A00n" "Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.36(KHTML,like Gecko) Chrome/29.0.1547.66 Safari/537.36"
1.运行程序后,出现如下问题:
222.68.172.190 1
org.apache.hadoop.io.java.lang.ClassCastException: org.apache.hadoop.io.LongWritable cannot be cast to org.apache.hadoop.io.Text
解决办法:当MapReduce程序读入一个文件时,mapper的key是文件的行的索引,value是文件一行的内容,所以不能把mapper中key的参数设置为Text,改成LongWritable即可
2.改正上述问题之后,运行程序后,out目录下的输出是:
221.68.172.190 1
222.68.172.190 1
222.68.172.190 1
222.68.172.190 1
223.68.172.190 1223.68.172.190 1
显然程序并没有进行reducer操作。
解决办法: 修改Reducer<Text, Text, Text, LongWritable>中第二个Text为LongWriteable即可。
正确的运行结果如下:
221.68.172.190 1
222.68.172.190 3
223.68.172.190 2
time:2014/2/24