Hadoop之MapReduce (排序2)

本文介绍了一个使用Hadoop MapReduce实现的电影热度排名算法,通过统计每部电影的用户评论数量来确定电影的热度,并从中选取热度最高的前三部电影。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

今天以起始数据为起始点,统计热度前三个电影。

我是以每个电影用户评论多少最为热度的评选。

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class CountTen implements WritableComparable<CountTen> {
    private String movie;
    private String timeStamp;
    private Integer rate;
    private String uid;

    public String getMovie() {
        return movie;
    }

    public void setMovie(String movie) {
        this.movie = movie;
    }

    public String getTimeStamp() {
        return timeStamp;
    }

    public void setTimeStamp(String timeStamp) {
        this.timeStamp = timeStamp;
    }

    public Integer getRate() {
        return rate;
    }

    public void setRate(Integer rate) {
        this.rate = rate;
    }

    public String getUid() {
        return uid;
    }

    public void setUid(String uid) {
        this.uid = uid;
    }


    @Override
    public int compareTo(CountTen o) {
        return o.getMovie().compareTo(this.movie);
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.movie);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.movie = dataInput.readUTF();
    }
}

第一步:先将初始数据统计出每个电影评论的总数。

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.codehaus.jackson.map.ObjectMapper;

import java.io.IOException;

public class Woedount {
    public static class CountMap extends Mapper<LongWritable, Text,Text, IntWritable>{
        ObjectMapper objectMapper = new ObjectMapper();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String word = value.toString();
            CountTen contTen = objectMapper.readValue(word,CountTen.class);
            context.write(new Text(contTen.getMovie()),new IntWritable(1));


        }
        public static class CountRaduce extends Reducer<Text, IntWritable,Text, IntWritable>{
            @Override
            protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
                Integer count=0;
                for (IntWritable s:values) {
                    count++;
                }
                context.write(key,new IntWritable(count));
            }
        }

        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();
            conf.set("yarn.resorcemanager.hostname","192.168.72.110");
            conf.set("fs.deafutFS", "hdfs://192.168.72.110:9000/");

            Job job = Job.getInstance();
            job.setJarByClass(Woedount.class);
            job.setMapperClass(CountMap.class);
            job.setReducerClass(CountRaduce.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            FileInputFormat.setInputPaths(job,new Path(args[0]));
            FileOutputFormat.setOutputPath(job,new Path(args[1]));

            job.submit();
            boolean b = job.waitForCompletion(true);
            System.exit(b ? 0 : 1);

        }
    }
}

在这里插入图片描述

第二步 用第一步的结果进行排序,取出最热的电影。

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;

public class WordSort2 {
    //(输出key:count,value:movie)
    public static class WordSort2Map extends Mapper<LongWritable, Text, IntWritable,Text>{
        Text v = new Text();
        IntWritable k = new IntWritable();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            String count = split[1];
            String movie = split[0];
            k.set(Integer.parseInt(split[1]));
            v.set(split[0]);
            //在 write里 new放在了map方法外面,减少了占用内存
            context.write(k,v);
        }
    }
    //(输入key:count,value:movie)
    //(输出key:movie;value:count)
    public static class WordSort2Reduce extends Reducer<IntWritable,Text,Text,IntWritable>{
         //创建treeMap  以键:值存储, 还有就是方便下面的map.pollFirstEntry()方法
        //此方法是拿出一对,容器里就少了一对。
        TreeMap<IntWritable,Text> map;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            map = new TreeMap<IntWritable,Text>(new Comparator<IntWritable>() {
                @Override
                public int compare(IntWritable o1, IntWritable o2) {
                    return -o1.compareTo(o2);
                }
            });
        }

        @Override
        protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            String movie = values.iterator().next().toString();
            Integer count = key.get();
            map.put(new IntWritable(count),new Text(movie));
        }
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            for (int i = 0; i < 3; i++) {
                Map.Entry<IntWritable, Text> entry = map.pollFirstEntry();
                IntWritable count = entry.getKey();
                Text movie = entry.getValue();
                context.write(movie,count);
            }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        conf.set("yarn.resorcemanager.hostname","192.168.72.110");
        conf.set("fs.deafutFS", "hdfs://192.168.72.110:9000/");

        job.setJarByClass(WordSort2.class);

        job.setMapperClass(WordSort2Map.class);
        job.setReducerClass(WordSort2Reduce.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

//        FileInputFormat.setInputPaths(job,new Path("D:\\eclipse\\wc\\input"));
//        FileOutputFormat.setOutputPath(job,new Path("D:\\eclipse\\wc\\output"));

        job.submit();
        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

在这里插入图片描述

将两部写在一个类中

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.codehaus.jackson.map.ObjectMapper;

import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;

public class UserSortTopN {
    public static class UserSortTopNMap extends Mapper<LongWritable, Text, Text,IntWritable>{
        ObjectMapper objectMapper = new ObjectMapper();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            CountTen countTen = objectMapper.readValue(line, CountTen.class);
            String movie = countTen.getMovie();
            context.write(new Text(movie),new IntWritable(1));
        }
    }
    public static class UserSortTopNReduce extends Reducer<Text,IntWritable,Text,IntWritable>{
        TreeMap<IntWritable,Text> map;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            map = new TreeMap<IntWritable,Text>(new Comparator<IntWritable>() {
                @Override
                public int compare(IntWritable o1, IntWritable o2) {
                    return -o1.compareTo(o2);
                }
            });
        }


        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            Integer count = 0;
            for (IntWritable v:values) {
                count++;
            }
            map.put(new IntWritable(count),new Text(key));
        }

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            for (int i = 0; i < 3; i++) {
                Map.Entry<IntWritable, Text> entry = map.pollFirstEntry();
                IntWritable count = entry.getKey();
                Text movie = entry.getValue();
                context.write(movie,count);
            }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        conf.set("yarn.resorcemanager.hostname", "192.168.72.110");
        conf.set("fs.deafutFS", "hdfs://192.168.72.110:9000/");

        job.setJarByClass(UserSortTopN.class);

        job.setMapperClass(UserSortTopNMap.class);
        job.setReducerClass(UserSortTopNReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

//        FileInputFormat.setInputPaths(job,new Path("D:\\eclipse\\wc\\input"));
//        FileOutputFormat.setOutputPath(job,new Path("D:\\eclipse\\wc\\output"));

        job.submit();
        boolean b = job.waitForCompletion(true);
        System.exit(b ? 0 : 1);
    }
}
Windows 系统修复工具主要用于解决 Windows 11/10 系统中的各种常见问,具有操作简单、功能全面等特点: 文件资源管理器修复:可解决文件资源管理器卡死、崩溃、无响应等问,能终止崩溃循环。还可修复右键菜单无响应或选项缺失问,以及重建缩略图缓存,让图片、视频等文件的缩略图正常显示,此外,还能处理桌面缺少回收站图标、回收站损坏等问。 互联网和连接修复:能够刷新 DNS 缓存,加速网页加载速度,减少访问延迟。可重置 TCP/IP 协议栈,增强网络连接稳定性,减少网络掉线情况,还能还原 Hosts 文件,清除恶意程序对网络设置的篡改,保障网络安全,解决电脑重装系统后网络无法连接、浏览器主页被篡改等问。 系统修复:集成系统文件检查器(SFC),可自动扫描并修复受损的系统文件。能解决 Windows 激活状态异常的问,还可重建 DLL 注册库,恢复应用程序兼容性,解决部分软件无法正常运行的问,同时也能处理如 Windows 沙箱无法启动、Windows 将 JPG 或 JPEG 保存为 JFIF 等系统问。 系统工具维护:提供启动管理器、服务管理器和进程管理器等工具,用户可控制和管理启动程序、系统服务和当运行的进程,提高系统的启动和运行速度,防止不必要的程序和服务占用系统资源。还能查看系统规格,如处理器线程数、最大显示分辨率等。 故障排除:集成超过 20 个微软官方诊断工具,可对系统问进行专业排查,还能生成硬件健康状态报告。能解决搜索和索引故障、邮件和日历应用程序崩溃、设置应用程序无法启动等问,也可处理打印机、网络适配器、Windows 更新等相关故障。 其他修复功能:可以重置组策略设置、catroot2 文件夹、记事本等多种系统设置和组件,如重置 Windows 应用商店缓存、Windows 防火墙设置等。还能添加重建图标缓存支持,恢复粘滞便笺删除
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值