自定义分区器

//1
package com.example.mapreduce;
//自定义分区器

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

//1. 继承Partitioner类
//2. 重写getPartition方法
public class CustomerPartitioner extends Partitioner<Text, LongWritable> {
    @Override
    public int getPartition(Text key, LongWritable value, int numPartitions) {
        //返回值是0,1,2.......
        // 如果key1和Key2的code相同,那么他们会被分配到同一个分区,保存到同一个文件
//        如果单词是以a~m开头的,那么会被分配到第一个分区,
//        否则,被分配到第二个分区
        //1. 获取单词首字符
        char firstChar = key.toString().charAt(0);
        //2. 判断首字符
        if(firstChar >= 'a' && firstChar <= 'm'){
            return 0;
        }else {
            return 1;
        }
    }
}

//2

package com.example.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {

    // mapreduce的Driver
    // 提交job
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        //设置操作集群时的用户名为root
        System.setProperty("HADOOP_USER_NAME", "root");
        // 1. 获取配置信息以及获取job对象
        Configuration conf = new Configuration();

        //设置集群地址
        //conf.set("fs.defaultFS", "hdfs://douyin:8020");

        Job job = Job.getInstance(conf);
        // 2. 关联本地的jar
        job.setJarByClass(WordCountDriver.class);

        // 3. 关联Mapper和Reducer
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);

        // 4. 设置Mapper输出的KV类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // 5. 设置最终输出KV类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

//        // 6. 设置输入和输出路径。请确保wcinput是存在的,并且下面有待统计词频的单词文件。
//        //                         output1会自动被创建,如果它已经存在,程序会报错!
//        FileInputFormat.setInputPaths(job, new Path("E://vm//wordcount"));
//        FileOutputFormat.setOutputPath(job, new Path("E://vm//wordcount1"));

        //改成集群的地址
        FileInputFormat.setInputPaths(job, new Path("D:\\vm\\wcinput"));
        FileOutputFormat.setOutputPath(job, new Path("D:\\vm\\wcoutput1"));

//        设置reduceTask的数量
        job.setNumReduceTasks(2);
        //        设置自定义分区器
        job.setPartitionerClass(CustomerPartitioner.class);
        // 7. 提交job
        boolean b = job.waitForCompletion(true);
        System.exit(b ? 0 : 1);
    }
}

//3

package com.example.mapreduce;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

//1.从Mapper继承
//2.重写map方法
//LongWritable,Text:表示初始值的键值对格式。LongWritable是键的数据类型,Text是值的数据类型
//Text,LongWritable:表示map函数输出的数据的格式。Text是键的数据类型,LongWritable是值的数据类型
public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //1.获取一行数据,并拆分成一个个的单词
        String[] words = value.toString().split(" ");
        //2.遍历单词,并输出<单词,1>的键值对
        for (String word : words) {
            context.write(new Text(word), new LongWritable(1));

        }
    }
}

//4

package com.example.mapreduce;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//1.继承Reducer
//2.重写reduce方法
public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        //1.对每个values中的数据进行汇总:累加求和
        long sum = 0;
        for (LongWritable value : values) {
            sum += value.get();
        }
        //2.输出<单词,总次数>
        context.write(key, new LongWritable(sum));
    }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值