MapReduce单表关联

数据:
找出孩子的爷爷奶奶姥姥老爷

child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Marry
Lucy Jesse
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma

结果:

Jone    Alice
Tom    Alice
Jone    Jesse
Tom    Jesse
Jone    Marry
Tom    Marry
Jone    Jesse
Tom    Jesse
Mark    Alice
Philip    Alice
Mark    Jesse
Philip    Jesse

Mapper:

一个坑:每次放入context.write()的时候都需要重新new 一个Text出来。不可以用原来的Text.set()方法

package _SingleTable;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @Author:Dapeng
 * @Discription:
 * @Date:Created in 上午 10:11 2018/11/8 0008
 */
public class SingleTableMap extends Mapper<LongWritable,Text,Text,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {


        String line = value.toString();
        String[] wordArr = line.split("\\s+");
        if(!"child".equals(wordArr[0])){
            //设置parents
            context.write(new Text(wordArr[0]),new Text("1:" + wordArr[1]));
            //设置son
            context.write(new Text(wordArr[1]),new Text("2:" + wordArr[0]));
        }

    }
}

Reducer

package _SingleTable;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @Author:Dapeng
 * @Discription:
 * @Date:Created in 上午 10:11 2018/11/8 0008
 */
public class SingleTableReduce extends Reducer<Text,Text,Text,Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {


        List<String> parents = new ArrayList<String>();
        List<String> childs = new ArrayList<String>();
        Text t1 = new Text();
        Text t2 = new Text();

        for(Text t : values){
            String str = t.toString();
            String[] s = str.split(":");

            if ("1".equals(s[0])) {
                parents.add(s[1]);
            } else if("2".equals(s[0])) {
                childs.add(s[1]);
            }

        }

       for(String p :parents){
            for(String c:childs){
                t1.set(p);
                t2.set(c);
                context.write(t2,t1);
            }
       }
    }
}
package _SingleTable;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @Author:Dapeng
 * @Discription:
 * @Date:Created in 上午 10:11 2018/11/8 0008
 */
public class SingleTableMain {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        //0.创建一个job
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf,"single_table");
        job.setJarByClass(SingleTableMain.class);
        //1.输入文件
        //默认用TextInputFormat
        FileInputFormat.addInputPath(job,new Path("file:/D:/hadoopFile/singleTable/data.txt"));
        //2.编写mapper
        job.setMapperClass(SingleTableMap.class);
        //设置输出的格式
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        //3.shuffle

        //4.reduce
        job.setReducerClass(SingleTableReduce.class);
        //设置输出的格式
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        //5.输出
        FileOutputFormat.setOutputPath(job,new Path("file:/D:/hadoopFile/singleTable/out"));

        //6.运行

        boolean result = job.waitForCompletion(true);
        System.out.println(result);
    }
}

 

转载于:https://www.cnblogs.com/da-peng/p/9930765.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值