MapReduce部分练习使用API进行编程:
1、主方法:
package com.bjsxt.TQ;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider.Text;
public class MyTQ {
public static void main(String[] args) throws Exception {
//设置配置层
Configuration conf=new Configuration(true);
Job job =Job.getInstance(conf);
job.setJarByClass(MyTQ.class);
job.setJobName("ooxx");
//设置输入格式
// job.setInputFormatClass(LongWritable.class);
//设置输入路径
Path fileiin=new Path("/user/tq");
FileInputFormat.addInputPath(job, fileiin);
//设置map方法
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(TQ.class);
job.setMapOutputValueClass(IntWritable.class);
//设置map端的输出格式
//设置分区器
job.setPartitionerClass(MyPartitioner.class);
//设置排序比较器
job.setSortComparatorClass(MyComparator.class);
//设置map端的预聚合
// job.setCombinerClass(MyCombsiner.class);
//设置reduce端的排序,需要继承WritableComparator方法
job.setGroupingComparatorClass(MyGroupCoomparator.class);
//The method setGroupingComparatorClass(Class<? extends RawComparator>)
//in the type Job is not applicable for the arguments (Class<MyGroupCoomparator>)
//设置reduce端
job.setReducerClass(MyReducer.class);
//设置reduce端的输出

这篇博客详细介绍了如何运用MapReduce API进行编程,通过一个天气案例,讲解了从主方法设定、TQ类的实现,到MyMapper类的设计,以及分区、比较器和reduce端的配置全过程。
最低0.47元/天 解锁文章
578

被折叠的 条评论
为什么被折叠?



