数据算法-hadoop3 TopN

topN 全世界都一样。。。先map取topN,在reduce把各个map的topN取topN

map时候top10cast.put后,所有的value都会变成最新的一个,应该是地址引用问题,之前从来没有注意。只能put之前先tostring一把。
reduce时候也一样

    public static void main(String[] args) throws Exception {
        Configuration conf1 = new Configuration();
        System.setProperty("hadoop.home.dir", "D:\\hadoop-2.5.2");
        conf1.setInt("N", 10);

        conf1.setBoolean("dfs.permissions", false);

        Job job = Job.getInstance(conf1, "TopN");

        job.setMapperClass(TopNMapper.class);
        job.setReducerClass(TopNReducer.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(Text.class);

        job.setNumReduceTasks(1);

        FileInputFormat.setInputPaths(job, new Path("C:\\demo\\03\\03.txt"));
        FileOutputFormat.setOutputPath(job, new Path("C:\\demo\\03\\out"));
        if (job.waitForCompletion(true)) {
            log.info("MR run successfully");

        } else {
            log.error("MR run failed");

        }

    }
public class TopNMapper extends Mapper<Object, Text, NullWritable, Text> {
    private SortedMap<Double, Text> top10cast = new TreeMap<Double, Text>();
    private int N = 10;

    @Override
    protected void setup(
            Mapper<Object, Text, NullWritable, Text>.Context context)
            throws IOException, InterruptedException {
            this.N = context.getConfiguration().getInt("N", 10); 
    }

    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
        String[] tokens = value.toString().split(",");
        String s = value.toString();
        double weight = Double.parseDouble(tokens[0]);
        top10cast.put(weight, new Text(s));

        if (top10cast.size() > N) {
            top10cast.remove(top10cast.firstKey());
        }
    }

    @Override
    protected void cleanup(Context context) throws IOException,
            InterruptedException {
        for (Text catAttributes : top10cast.values()) {
            context.write(NullWritable.get(), catAttributes);
        }
    }
}
public class TopNReducer extends Reducer<NullWritable, Text, NullWritable, Text> {

    private int N = 10;

    @Override
    protected void setup(
            Reducer<NullWritable, Text, NullWritable, Text>.Context context)
            throws IOException, InterruptedException {
        this.N = context.getConfiguration().getInt("N", 10); 
    }
    /**
     * reduce
     */
    @Override
    protected void reduce(NullWritable key, Iterable<Text> values,
            Context context) throws IOException, InterruptedException {
         SortedMap<Double, Text> finaltop10cast = new TreeMap<Double, Text>();
         for (Text catRecord:values) {
             double weight = Double.parseDouble(catRecord.toString().split(",")[0]);
             String s = catRecord.toString();
             finaltop10cast.put(weight, new Text(s));
             if (finaltop10cast.size() >N) {
                 finaltop10cast.remove(finaltop10cast.firstKey());
             }
         }
         for (Text text :finaltop10cast.values()) {
             context.write(NullWritable.get(), text);
         }
    }
}

输入

12,cat1
13,cat2
14,cat3
15,cat4
10,cat5
100,cat100
200,cat200
300,cat300
1,cat001
67,cat67
22,cat22
23,cat23
1000,cat1000
2000,cat2000

输出

14,cat3
15,cat4
22,cat22
23,cat23
67,cat67
100,cat100
200,cat200
300,cat300
1000,cat1000
2000,cat2000
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值