一、前期准备
可参考 “词频统计” 案例中的前期准备阶段
二、数据准备
生成天气数据,上传至hdfs
package com.hdtrain;
import javafx.scene.input.DataFormat;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class GenerateWeather {
public static void main(String[] args) throws ParseException {
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
long start = dateFormat.parse("2000-01-01 00:00:00").getTime();
long end = dateFormat.parse("2019-12-31 23:59:59").getTime();
long different = end - start;
for (int i=0;i<10000;i++){
Date date = new Date(start + (long) (Math.random() * different));
int temperature = -20 + (int) (Math.random() * 60);
System.out.println(dateFormat.format(date) + "\t" + temperature);
}
}
}
三、天气预报案例
1.WeatherJob.class
package com.hdtrain;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WeatherJob {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration = new Configuration(true);
configuration.set("mapreduce.framework.name", "local");
Job job = Job.getInstance(configuration);
job.setJobName("Weather--" + System.currentTimeMillis());
job.setJarByClass(WeatherJob.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, new Path("/data/weather.txt"));
FileOutputFormat.setOutputPath(job, new Path("/results/Weather-" + System.currentTimeMillis()));
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setMapperClass(WeatherMapper.class);
job.setReducerClass(WeatherReducer.class);
job.waitForCompletion(true);
}
}
②计算每个月份的最大的3个天气
代码同上
③计算每年每月温度最高的3天
package com.hdtrain.Weather;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io