Hadoop 案例6-----TopN问题:求最大的K个值并排序

本文介绍了一个基于Hadoop实现的TopN支付金额案例,包括Mapper和Reducer程序设计,展示了如何通过MapReduce处理大量订单数据,找出支付金额最大的前N笔交易。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、需求分析

orderid,userid,payment,productid
[root@x00 hd]# cat seventeen_a.txt

1,9819,100,121
2,8918,2000,111
3,2813,1234,22
4,9100,10,1101
5,3210,490,111
6,1298,28,1211
7,1010,281,90
8,1818,9000,20

[root@x00 hd]# cat seventeen_b.txt

100,3333,10,100
101,9321,1000,293
102,3881,701,20
103,6791,910,30
104,8888,11,39

2.Mapper程序:

package cn.edu.bjut.topn;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TopNMapper extends Mapper<LongWritable, Text, IntWritable, IntWritable> {

    int len;
    int[] top;
    @Override
    protected void cleanup(Context context)
            throws IOException, InterruptedException {
        for(int i=1; i<top.length; i++) {
            context.write(new IntWritable(top[i]), new IntWritable(top[i]));
        }
    }

    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        String line = value.toString().trim();
        String[] arr = line.split(",");
        if(4 == arr.length) {
            int payment = Integer.parseInt(arr[2]);
            add(payment);
        }
    }

    private void add(int payment) {
        top[0] = payment;
        Arrays.sort(top);
    }

    @Override
    protected void setup(Context context)
            throws IOException, InterruptedException {
        len = context.getConfiguration().getInt("N", 10);
        top = new int[len+1];
    }

}

3.Reducer程序:

package cn.edu.bjut.topn;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class TopNReducer extends Reducer<IntWritable, IntWritable, Text, Text> {

    int len;
    int[] top;
    @Override
    protected void cleanup(Context context)
            throws IOException, InterruptedException {
        for(int i=len; i>0; i--) {
            context.write(new Text(String.valueOf(len-i+1)), new Text(String.valueOf(top[i])));
        }
    }

    @Override
    protected void reduce(IntWritable key, Iterable<IntWritable> values, Context context)
            throws IOException, InterruptedException {
        for(IntWritable value : values) {
            add(value.get());
        }
    }

    private void add(int i) {
        top[0] = i;
        Arrays.sort(top);
    }

    @Override
    protected void setup(Context context)
            throws IOException, InterruptedException {
        len = context.getConfiguration().getInt("N", 10);
        top = new int[len + 1];
    }

}

4.主程序:

package cn.edu.bjut.topn;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MainJob {
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = new Job(conf, "topn");
        job.setJarByClass(MainJob.class);

        job.setMapperClass(TopNMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(TopNReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));

        Path outPath = new Path(args[1]);
        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(outPath)) {
            fs.delete(outPath, true);
        }
        FileOutputFormat.setOutputPath(job, outPath);
        job.waitForCompletion(true);
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值