案例需求:
输入数据:
要求:
计算每行总上行流量,并把手机号前三位相同的数据输出到同一个文件中,并按照总流量从大到小排序
输出结果:
三个文件
导包:
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.10.0</version>
</dependency>
</dependencies>
FlowBean类:
主要是重写compareTo方法实现排序
这里需要注意的是排序默认的是按照map输出的key来排序,所以一定把要排序的类作为key
实现的是WritableComparable,而不是WritableComparator,别记错了
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlowBean implements WritableComparable<FlowBean> {
long phone;
long upFlow;
long downFlow;
long totalFlow;
public FlowBean() {
super();
}
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(phone);
dataOutput.writeLong(upFlow);
dataOutput.writeLong(downFlow);
dataOutput.writeLong(totalFlow);
}
public void readFields(DataInput dataInput) throws IOException {
phone = dataInput.readLong();
upFlow=dataInput.readLong();
downFlow=dataInput.readLong();
totalFlow=dataInput.readLong();
}
@Override
public String toString() {
return phone+"\t"+upFlow+"\t"+downFlow+"\t"+totalFlow;
}
public long getUpFlow() {
return upFlow;
}
public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
}
public long getTotalFlow() {
return totalFlow;
}
public void setTotalFlow(long totalFlow) {
this.totalFlow = totalFlow;
}
public long getPhone() {
return phone;
}
public void setPhone(long phone) {
this.phone = phone;
}
public int compareTo(FlowBean o) {
// return this.getTotalFlow()>o.getTotalFlow()?-1:1;
if(totalFlow>o.getTotalFlow())
return -1;
else if (totalFlow<o.getTotalFlow())
return 1;
else
return 0;
}
}
MyMapper类:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class MyMapper extends Mapper<LongWritable,Text, FlowBean,NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line=value.toString();
String[] s = line.split("\\t");
FlowBean k = new FlowBean();
k.setPhone(Long.parseLong(s[0]));
k.setUpFlow(Long.parseLong(s[1]));
k.setDownFlow(Long.parseLong(s[2]));
k.setTotalFlow(k.getDownFlow()+k.getUpFlow());
context.write(k,NullWritable.get());
}
}
MyReducer类:
mport org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class MyReducer extends Reducer<FlowBean,NullWritable, FlowBean,NullWritable> {
@Override
protected void reduce(FlowBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
MyPartitioner类:
Partitioner泛型参数即map输出的看k,v类型
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class MyPartitioner extends Partitioner<FlowBean,NullWritable> {
public int getPartition(FlowBean flowBean, NullWritable nullWritable, int i) {
int partition=0;
if(flowBean.getPhone()==186)
partition=0;
else if(flowBean.getPhone()==158)
partition=1;
else
partition=2;
return partition;
}
}
MyDriver类:
分区主要:
job.setPartitionerClass(MyPartitioner.class);设定分区类
job.setNumReduceTasks(3);设置分区个数
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class MyDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(MyDriver.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
// job.setMapOutputKeyClass(LongWritable.class);
// job.setMapOutputValueClass(FlowBean.class);
job.setOutputKeyClass(FlowBean.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(MyPartitioner.class);
job.setNumReduceTasks(3);
FileInputFormat.addInputPath(job,new Path("/home/hadoop/temp/phone_info.txt"));
FileOutputFormat.setOutputPath(job,new Path("/home/hadoop/temp/phone_info_Partition"));
FileSystem.get(conf).delete(new Path("/home/hadoop/temp/phone_info_Partition"),true);
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}