求共同好友、自定义inputformat实现小文件的合并以及自定义outputFormat实现我们的数据输出到不同的路径下面去

本文链接：https://blog.youkuaiyun.com/Carina_____/article/details/108626750

本文介绍了如何通过MapReduce实现社交网络中共同好友的查找及文本分析技术，包括统计单词在不同文档中的出现次数。此外还探讨了小文件处理的优化方案。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1.社交粉丝数据分析：求共同好友

public class Step1Reducer extends Reducer<Text,Text,Text,Text> {
//reduce接收到的数据 B 【A，E】
// B 是我们的好友集合里面装的是多个用户
//将数据最终转换成这样的形式进行输出 A-B-E-F-G-H-K- C

public class Step1Mapper extends Mapper<LongWritable, Text,Text,Text> {
    //输入数据如下格式：A：B,C,D,F,E,O
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        //用户与好友列表
        String[] split = line.split(";");
        //好友列表
        String[] friendList = split[1].split(",");
        for(String friend : friendList){
            context.write(new Text(friend),new Text(split[0]));
        }
    }
}

public class Step1Reducer extends Reducer<Text,Text,Text,Text> {
    //reduce接收到的数据  B 【A，E】
    // B 是我们的好友 集合里面装的是多个用户
    //将数据最终转换成这样的形式进行输出  A-B-E-F-G-H-K-  C


    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        StringBuffer sb = new StringBuffer();
        for(Text value : values){
            sb.append(value.toString()).append("-");
        }
        context.write(new Text(sb.toString()),key);
    }
}

public class Step2Mapper extends Mapper<LongWritable, Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //用户列表与好友
        String[] split = value.toString().split("\t");
        //将我们的用户列表拿出来，做循环
        String[] userList = split[0].split("-");
        //对我们的用户列表进行排序，避免出现A-E  E-A这种情况
        Arrays.sort(userList);
        for (int i=0;i<userList.length-1;i++){
            for(int j=i+1;j<userList.length;j++){
                //A-E  M
                String userTwo = userList[i]+"-"+userList[j];
                context.write(new Text(userTwo),new Text(split[1]));
            }
        }
    }
}

public class Step2Reducer extends Reducer<Text,Text,Text,Text> {
    //A-E B,C,D

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        StringBuffer sb = new StringBuffer();
        for(Text value : values){
            //求出来我们的共同好友列表
            sb.append(value.toString()).append("-");
        }
        context.write(key,new Text(sb.toString()));
    }
}

2.求取哪些单词在哪些文章里面出现过多少次

public class IndexMapper extends Mapper<LongWritable, Text,Text, IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //获取这个数据从哪个文档里面来的，也就是获取文章的名称
        FileSplit fileSplit = (FileSplit) context.getInputSplit();
        //获取文章的名字
        String name = fileSplit.getPath().getName();
        String[] split = value.toString().split(" ");
        for(String s : split){
            context.write(new Text(s+"-"+name),new IntWritable(1));
        }
    }
}

public class IndexReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int i = 0;
        for(IntWritable value:values){
            i += value.get();
        }
        context.write(key,new IntWritable(i));
    }
}