1)需求
有如下订单数据
订单id | 商品id | 成交金额 |
0000001 | Pdt_01 | 222.8 |
0000001 | Pdt_06 | 25.8 |
0000002 | Pdt_03 | 522.8 |
0000002 | Pdt_04 | 122.4 |
0000002 | Pdt_05 | 722.4 |
0000003 | Pdt_01 | 222.8 |
0000003 | Pdt_02 | 33.8 |
现在需要求出每一个订单中最贵的商品。
2)输入数据: 输出数据预期:
3)分析
(1)利用“订单id和成交金额”作为key,可以将map阶段读取到的所有订单数据按照id分区,按照金额排序,发送到reduce。
(2)在reduce端利用groupingcomparator将订单id相同的kv聚合成组,然后取第一个即是最大值。
4)代码实现
(1)定义订单信息OrderBean
package com.lzz.twoOrder;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
public class OrderBean implements WritableComparable<OrderBean>{
private long orderId;
private double orderPrice;
@Override
public String toString() {
return orderId + "\t" + orderPrice;
}
public OrderBean() {
super();
}
public OrderBean(long orderId, double orderPrice) {
super();
this.orderId = orderId;
this.orderPrice = orderPrice;
}
public long getOrderId() {
return orderId;
}
public void setOrderId(long orderId) {
this.orderId = orderId;
}
public double getOrderPrice() {
return orderPrice;
}
public void setOrderPrice(double orderPrice) {
this.orderPrice = orderPrice;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(orderId);
out.writeDouble(orderPrice);
}
@Override
public void readFields(DataInput in) throws IOException {
this.orderId=in.readLong();
this.orderPrice=in.readDouble();
}
//二次排序
@Override
public int compareTo(OrderBean orderBean) {
int res;
//订单号正序排序
if(orderId>orderBean.getOrderId()) {
res=1;
}else if (orderId<orderBean.getOrderId()) {
res=-1;
}else{
//价格倒序排序
res=orderPrice>orderBean.getOrderPrice()?-1:1;
}
return res;
}
}
(2)编写OrderSortMapper
package com.lzz.twoOrder;
import java.io.IOException;
import org.apache.commons.lang.ObjectUtils.Null;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TwoOrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean k=new OrderBean();
@Override
protected void map(LongWritable key, Text value,
Context context)
throws IOException, InterruptedException {
String line=value.toString();
String[] words=line.split("\t");
k.setOrderId(Long.parseLong(words[0]));
k.setOrderPrice(Double.parseDouble(words[2]));
context.write(k, NullWritable.get());
}
}
(3)编写OrderSortPartitioner
Ctrl+T使用HashCode
package com.lzz.twoOrder;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class TwoOrderPartitioner extends Partitioner<OrderBean, NullWritable>{
@Override
public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
return (int)(key.getOrderId() & Integer.MAX_VALUE) % numPartitions;
//到目前为止,已经排序 分区完成
// 1 222.8
// 1 33.8
// 1 25.8
// 3 222.8
// 3 33.8
}
}
(4)编写OrderSortGroupingComparator
package com.lzz.twoOrder;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class OrderGroupingComparator extends WritableComparator{
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean=(OrderBean)a;
OrderBean bBean=(OrderBean)b;
int res;
if(aBean.getOrderId()>bBean.getOrderId()) {
res=1;
}else if(aBean.getOrderId()<bBean.getOrderId()) {
res=-1;
}else {
res=0;
}
return res;
}
protected OrderGroupingComparator() {
super(OrderBean.class,true);
}
}
(5)编写OrderSortReducer
package com.lzz.twoOrder;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.lzz.order.OrderDriver;
import com.lzz.order.OrderMapper;
import com.lzz.order.OrderReducer;
public class TwoOrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration=new Configuration();
Job job=Job.getInstance(configuration);
job.setJarByClass(TwoOrderDriver.class);
job.setMapperClass(TwoOrderMapper.class);
job.setReducerClass(TwoOrderReducer.class);
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
// 设置reduce端的分组
job.setGroupingComparatorClass(OrderGroupingComparator.class);
// 设置分区
job.setPartitionerClass(TwoOrderPartitioner.class);
// 设置reduce个数
job.setNumReduceTasks(3);
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean res=job.waitForCompletion(true);
System.exit(res?0:1);
}
}