需求三:手机号码分区
FlowBean:
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* 这里暂时不需要做排序,所以直接实现writable接口就可以了
*/
public class FlowBean implements Writable {
//上行流量
private Integer upFlow;
//下行流量
private Integer downFlow;
//上行总流量
private Integer upCountFlow;
//下行总流量
private Integer downCountFlow;
@Override
public String toString() {
return upFlow+"\t"+downFlow+"\t"+upCountFlow+"\t"+downCountFlow;
}
public Integer getUpFlow() {
return upFlow;
}
public void setUpFlow(Integer upFlow) {
this.upFlow = upFlow;
}
public Integer getDownFlow() {
return downFlow;
}
public void setDownFlow(Integer downFlow) {
this.downFlow = downFlow;
}
public Integer getUpCountFlow() {
return upCountFlow;
}
public void setUpCountFlow(Integer upCountFlow) {
this.upCountFlow = upCountFlow;
}
public Integer getDownCountFlow() {
return downCountFlow;
}
public void setDownCountFlow(Integer downCountFlow) {
this.downCountFlow = downCountFlow;
}
/**
* 序列化方法
* @param out
* @throws IOException
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(upFlow);
out.writeInt(downFlow);
out.writeInt(upCountFlow);
out.writeInt(downCountFlow);
}
/**
* 反序列化的方法
* @param in
* @throws IOException
*/
@Override
public void readFields(DataInput in) throws IOException {
this.upFlow = in.readInt();
this.downFlow = in.readInt();
this.upCountFlow = in.readInt();
this.downCountFlow = in.readInt();
}
}
FlowMapper:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowMapper extends Mapper<LongWritable,Text,Text,FlowBean> {
//1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 游戏娱乐 24 27 2481 24681 200
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
String phoneNum = split[1];
String upFlow = split[6];
String downFlow = split[7];
String upCountFlow = split[8];
String downCountFlow = split[9];
FlowBean flowBean = new FlowBean();
flowBean.setUpFlow(Integer.parseInt(upFlow));
flowBean.setDownFlow(Integer.parseInt(downFlow));
flowBean.setUpCountFlow(Integer.parseInt(upCountFlow));
flowBean.setDownCountFlow(Integer.parseInt(downCountFlow));
//往下一阶段写出我们的数据,key2 是手机号 value2 我们自己封装定义的javaBean
context.write(new Text(phoneNum),flowBean);
}
}
PhonePartition:
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class PhonePartition extends Partitioner<Text,FlowBean> {
/**
* 判断我们的手机号以什么数字开头
*
* @param text
* @param flowBean
* @param i
* @return
*/
@Override
public int getPartition(Text text, FlowBean flowBean, int i) {
String phone = text.toString();
if (phone.startsWith("135")){
return 0;
}else if(phone.startsWith("136")){
return 1;
}else if (phone.startsWith("137")){
return 2;
}else if(phone.startsWith("138")){
return 3;
}else if(phone.startsWith("139")){
return 4;
}else{
return 5;
}
}
}
FlowReducer:
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowReducer extends Reducer<Text,FlowBean,Text,FlowBean> {
@Override
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
int upFlow = 0;
int downFlow = 0;
int upCountFlow = 0;
int downCountFlow = 0;
for (FlowBean value : values) {
upFlow += value.getUpFlow();
downFlow += value.getDownFlow();
upCountFlow += value.getUpCountFlow();
downCountFlow += value.getDownCountFlow();
}
//写出去我们的手机号
FlowBean flowBean = new FlowBean();
flowBean.setUpFlow(upFlow);
flowBean.setUpCountFlow(upCountFlow);
flowBean.setDownFlow(downFlow);
flowBean.setDownCountFlow(downCountFlow);
context.write(key,flowBean);
}
}
FlowMain:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.SnappyCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class FlowMain extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
//组装我们的程序运行
Job job = Job.getInstance(super.getConf(), "flowCount");
//打包运行的时候,设置我们 的main方法在哪里java文件里面
job.setJarByClass(FlowMain.class);
//第一步:读取文件,解析成key,value对
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job,new Path(args[0]));
//第二步:自定义map逻辑
job.setMapperClass(FlowMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class);
//第三到六步全部省略
job.setPartitionerClass(PhonePartition.class);
//第七步:自定义reduce逻辑
job.setReducerClass(FlowReducer.class);
//设置reduce的个数为6个,每个reduce刚好处理一个分区里面的数据
job.setNumReduceTasks(6);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
//第八步:输出数据
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job,new Path(args[1]));
boolean b = job.waitForCompletion(true);
return b?0:1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
//开启我们map阶段的数据压缩
configuration.set("mapreduce.map.output.compress","true");
configuration.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
//开启我们reduce阶段的数据压缩
configuration.set("mapreduce.output.fileoutputformat.compress","true");
configuration.set("mapreduce.output.fileoutputformat.compress.type","RECORD");
configuration.set("mapreduce.output.fileoutputformat.compress.codec","org.apache.hadoop.io.compress.SnappyCodec");
int run = ToolRunner.run(configuration, new FlowMain(), args);
System.exit(run);
}
}
输出结果: