package com.ibeifeng.hadoop19_copy;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//mapreduce编程框架有点像八股文的形式:主要分三部分;
//编写Map类。继承Mapper类,实现map方法
//编写Reduce类,继承Reducer类,实现reduce方法
//编写驱动帆方法run
/*
* 原数据是
* hadoop hdfs yarn
* mapreduce linux hdfs
* sqoop spark hdfs
*/
public class Wordcount {
//Map
public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Text mapoutkey = new Text();
IntWritable mapoutvalue = new IntWritable(1);
String line = value.toString();
String[] values = line.split(" ");
for(String word : values){
mapoutkey.set(word);
context.write(mapoutkey, mapoutvalue);
}
}
}
public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
private final IntWritable reduceoutvalue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
int sum = 0;
for(IntWritable value: values){
sum += value.get();
}
reduceoutvalue.set(sum);
context.write(key, reduceoutvalue);
}
}
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
//获取Hadoop的默认配置信息
Configuration conf = new Configuration();
//创建job
Job job = Job.getInstance(conf, "ZhouXuemei");
//设置打成Jar包的类
job.setJarByClass(Wordcount.class);
//设置输入路径
Path inpath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inpath);
//设置Mapper类
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置Reducer类
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置输出路径
Path outpath = new Path(args[1]);
//判断输出路径是否存在,存在删除
FileSystem fs = outpath.getFileSystem(conf);
if(fs.exists(outpath)){
fs.delete(outpath);
}
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true) ? 0:-1;
}
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException{
args = new String[]{"hdfs://node-1:8020/user/test.txt","hdfs://node-1:8020/user/output3"};
int result = new Wordcount().run(args);
System.out.println(result);
}
}
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Text mapoutkey = new Text();
IntWritable mapoutvalue = new IntWritable(1);
String line = value.toString();
String[] values = line.split(" ");
for(String word : values){
mapoutkey.set(word);
context.write(mapoutkey, mapoutvalue);
}
}
}
public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{
private final IntWritable reduceoutvalue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
int sum = 0;
for(IntWritable value: values){
sum += value.get();
}
reduceoutvalue.set(sum);
context.write(key, reduceoutvalue);
}
}
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException{
//获取Hadoop的默认配置信息
Configuration conf = new Configuration();
//创建job
Job job = Job.getInstance(conf, "ZhouXuemei");
//设置打成Jar包的类
job.setJarByClass(Wordcount.class);
//设置输入路径
Path inpath = new Path(args[0]);
FileInputFormat.setInputPaths(job, inpath);
//设置Mapper类
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
//设置Reducer类
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置输出路径
Path outpath = new Path(args[1]);
//判断输出路径是否存在,存在删除
FileSystem fs = outpath.getFileSystem(conf);
if(fs.exists(outpath)){
fs.delete(outpath);
}
FileOutputFormat.setOutputPath(job, outpath);
return job.waitForCompletion(true) ? 0:-1;
}
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException{
args = new String[]{"hdfs://node-1:8020/user/test.txt","hdfs://node-1:8020/user/output3"};
int result = new Wordcount().run(args);
System.out.println(result);
}
}