业务:输出相同订单编号中最大金额的记录
订单编号 金额
Order_0000001,Pdt_01,222.8
Order_0000001,Pdt_05,25.8
Order_0000002,Pdt_05,325.8
Order_0000002,Pdt_03,522.8
Order_0000002,Pdt_04,122.4
Order_0000003,Pdt_01,222.8
思路:传入reduce时,相同订单编号只传入一次,且 是最大的一个,实现GroupingComparator接口,然后,分区时 按订单编号,需要重写Partitioner接口
OrderBean.java,OrderIdGroupingComparator.java,OrderIdPartitioner.java SecondarySort.java
OrderBean.java
package cn.jugiven.bigdata.secondarysort;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class OrderBean implements WritableComparable<OrderBean>{
private String orderId;
private Double money;
public OrderBean(){
}
public OrderBean(String orderId,double money){
this.orderId=orderId;
this.money = money;
}
public void setAll(String orderId,double money){
this.orderId=orderId;
this.money = money;
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public double getMoney() {
return money;
}
public void setMoney(double money) {
this.money = money;
}
@Override
public void readFields(DataInput in) throws IOException {
this.money=in.readDouble();
this.orderId=in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeDouble(money);
out.writeUTF(orderId);
}
@Override
public int compareTo(OrderBean o) {
int cmp = this.orderId.compareTo(o.orderId);
if(cmp==0){
cmp=-this.money.compareTo(o.money);
}
return cmp;
}
@Override
public String toString() {
return orderId +'\t'+money;
}
}
OrderIdGroupingComparator.java
package cn.jugiven.bigdata.secondarysort;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderIdGroupingComparator extends WritableComparator{
public OrderIdGroupingComparator() {
super(OrderBean.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
// TODO Auto-generated method stub
OrderBean oa =(OrderBean) a;
OrderBean ob =(OrderBean) b;
return oa.getOrderId().compareTo(ob.getOrderId());
}
}
OrderIdPartitioner.java
package cn.jugiven.bigdata.secondarysort;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class OrderIdPartitioner extends Partitioner<OrderBean, NullWritable>{
@Override
public int getPartition(OrderBean orderbean, NullWritable value, int numReducertask) {
return(orderbean.getOrderId().hashCode()&Integer.MAX_VALUE)%numReducertask;
}
}
secondarysort.java
package cn.jugiven.bigdata.secondarysort;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class SecondarySort {
static class SecondarySortMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean bean = new OrderBean();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] words=line.split(",");
bean .setAll(words[0],Double.parseDouble(words[2]));
context.write(bean, NullWritable.get());
}
}
static class SecondarySortReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean orderBean, Iterable<NullWritable> values,
Context context)
throws IOException, InterruptedException {
context.write(orderBean, NullWritable.get());
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job =Job.getInstance(conf);
job.setJarByClass(SecondarySort.class);
job.setMapperClass(SecondarySortMapper.class);
job.setReducerClass(SecondarySortReducer.class);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(OrderIdPartitioner.class);
job.setGroupingComparatorClass(OrderIdGroupingComparator.class);
FileInputFormat.setInputPaths(job, new Path("C:/wordcount/gpinput"));
FileOutputFormat.setOutputPath(job,new Path("C:/wordcount/gpoutput"));
job.setNumReduceTasks(1);
boolean res=job.waitForCompletion(true);
System.exit(res?0:1);
}
}
创建目录:
C:/wordcount/gpinput
然后本地运行即可,当然也可以打成jar包,放到集群上跑