需求
源数据举例
{“date”:“2020-02-03”,“city”:“beijing”}
处理后的数据
{“date”:“2020/02/03”,“city”:“beijing”}
Mapper部分
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WeatherDataMap3 extends Mapper<LongWritable, Text,Text, NullWritable>{
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
line=line.replace("-","/");//方法一
// line=line.replaceAll("\\-","/");//正则表达式("\\"="\")//方法二
k.set(line);
context.write(k,NullWritable.get());
}
}
Reducer部分
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WeatherDataReduce3 extends Reducer<Text, NullWritable,Text,NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
Driver部分
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WeatherDataDriver3 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("hadoop.tmp.dir","D:\\file\\hdfs_temp");
//创建job
Job job = Job.getInstance(conf);
//设置jar存储位置
job.setJarByClass(WeatherDataDriver3.class);
//关联map和reduce
// job.setMapperClass(WeatherDataMap3.class);
job.setMapperClass(WeatherDataMap3_4.class);
job.setReducerClass(WeatherDataReduce3.class);
//map阶段输出key和value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//最后阶段输出
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//输入输出路径
Path p =setPath(job);
p.getFileSystem(conf).delete(p,true);
boolean result = job.waitForCompletion(true);
System.out.println(result);
System.exit(result?0:1);
}
public static Path setPath(Job job) throws IOException {
Path inPath = new Path("D:\\file\\source\\weatherdata3.txt");
Path outPath = new Path("D:\\file\\output");
FileInputFormat.addInputPath(job,inPath);
FileOutputFormat.setOutputPath(job,outPath);
return outPath;
}
}