package com.temptopn.api;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.*;
public class TopNReducer extends Reducer<TempKey, IntWritable, Text, Text> {
private Text outputKey = new Text();
private Text outputValue = new Text();
@Override
protected void reduce(TempKey key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
TreeMap<Integer, Integer> dailyTemps = new TreeMap<>(Collections.reverseOrder());
// 收集每天的最高温度
for (IntWritable val : values) {
int day = key.getDay();
dailyTemps.merge(day, val.get(), Math::max);
}
// 取温度最高的两天
Iterator<Map.Entry<Integer, Integer>> it = dailyTemps.entrySet().iterator();
int count = 0;
while (it.hasNext() && count < 2) {
Map.Entry<Integer, Integer> entry = it.next();
String date = String.format("%04d-%02d-%02d",
key.getYear(), key.getMonth(), entry.getKey());
outputKey.set(date);
outputValue.set(String.format("站点:%s 温度:%d",
key.getStationId(), entry.getValue()));
context.write(outputKey, outputValue);
count++;
}
}
}package com.temptopn.api;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class TopNMapper extends Mapper<LongWritable, Text, TempKey, IntWritable> {
private TempKey tempKey = new TempKey();
private IntWritable temp = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String[] parts = value.toString().split("\\s+");
// 要求:至少11列,且温度字段(第8列)有效
if (parts.length >= 11 && !parts[8].equals("-9999")) {
try {
// 构造复合键(假设站点ID固定为"599970")
tempKey.setStationId("599970"); // 硬编码或从配置读取
tempKey.setYear(Integer.parseInt(parts[0]));
tempKey.setMonth(Integer.parseInt(parts[1]));
tempKey.setDay(Integer.parseInt(parts[2]));
// 解析温度(第8列)
int temperature = Integer.parseInt(parts[8]);
tempKey.setTemp(temperature);
temp.set(temperature);
context.write(tempKey, temp);
} catch (Exception e) {
context.getCounter("MAPPER", "PARSE_ERRORS").increment(1);
}
} else {
context.getCounter("MAPPER", "SKIPPED_RECORDS").increment(1);
}
}
}package com.temptopn.api;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TopNClient {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: TopNClient <input path> <output path>");
System.exit(1);
}
Configuration conf = new Configuration();
// 启用递归读取输入目录
conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true");
Job job = Job.getInstance(conf, "Station599970_Top2_Temperature");
job.setJarByClass(TopNClient.class);
// 设置输入路径(自动递归子目录)
FileInputFormat.addInputPath(job, new Path(args[0]));
// 自动删除已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = outputPath.getFileSystem(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
System.out.println("已删除已存在的输出目录: " + args[1]);
}
FileOutputFormat.setOutputPath(job, outputPath);
// Mapper设置
job.setMapperClass(TopNMapper.class);
job.setMapOutputKeyClass(TempKey.class);
job.setMapOutputValueClass(IntWritable.class);
// Shuffle配置
job.setPartitionerClass(TempPartitioner.class);
job.setSortComparatorClass(TempSortComparator.class);
job.setGroupingComparatorClass(TempGoupingComparator.class);
// Reducer设置
job.setReducerClass(TopNReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}package com.temptopn.api;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class TempSortComparator extends WritableComparator {
protected TempSortComparator() {
super(TempKey.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
TempKey k1 = (TempKey) a;
TempKey k2 = (TempKey) b;
int c1 = Integer.compare(k1.getYear(), k2.getYear());
if (c1 == 0) {
int c2 = Integer.compare(k1.getMonth(), k2.getMonth());
if (c2 == 0) {
return -Integer.compare(k1.getTemp(), k2.getTemp()); // 温度降序
}
return c2;
}
return c1;
}
}package com.temptopn.api;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;
public class TempPartitioner extends Partitioner<TempKey, IntWritable> {
@Override
public int getPartition(TempKey key, IntWritable value, int numPartitions) {
return (key.getYear() * 12 + key.getMonth()) % numPartitions;
}
}package com.temptopn.api;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class TempKey implements WritableComparable<TempKey> {
private int year;
private int month;
private int day;
private int temp;
private String stationId;
// Getters and Setters
public int getYear() { return year; }
public void setYear(int year) { this.year = year; }
public int getMonth() { return month; }
public void setMonth(int month) { this.month = month; }
public int getDay() { return day; }
public void setDay(int day) { this.day = day; }
public int getTemp() { return temp; }
public void setTemp(int temp) { this.temp = temp; }
public String getStationId() { return stationId; }
public void setStationId(String stationId) { this.stationId = stationId; }
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(year);
out.writeInt(month);
out.writeInt(day);
out.writeInt(temp);
out.writeUTF(stationId);
}
@Override
public void readFields(DataInput in) throws IOException {
year = in.readInt();
month = in.readInt();
day = in.readInt();
temp = in.readInt();
stationId = in.readUTF();
}
@Override
public int compareTo(TempKey that) {
int c1 = Integer.compare(this.year, that.getYear());
if (c1 == 0) {
int c2 = Integer.compare(this.month, that.getMonth());
return (c2 == 0) ? Integer.compare(this.day, that.getDay()) : c2;
}
return c1;
}
}package com.temptopn.api;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
public class TempGoupingComparator extends WritableComparator {
protected TempGoupingComparator() {
super(TempKey.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
TempKey k1 = (TempKey) a;
TempKey k2 = (TempKey) b;
int c1 = Integer.compare(k1.getYear(), k2.getYear());
return (c1 == 0) ? Integer.compare(k1.getMonth(), k2.getMonth()) : c1;
}
}package com.temptopn.api;
import javafx.application.Application;
import javafx.scene.Scene;
import javafx.scene.chart.LineChart;
import javafx.scene.chart.NumberAxis;
import javafx.scene.chart.XYChart;
import javafx.stage.Stage;
import java.time.LocalDate;
import java.time.Month;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
public class TemperatureVisualization extends Application {
// 温度数据记录类
static class TemperatureRecord {
LocalDate date;
int temperature;
public TemperatureRecord(LocalDate date, int temperature) {
this.date = date;
this.temperature = temperature;
}
}
@Override
public void start(Stage primaryStage) {
// 准备数据
List<TemperatureRecord> records = prepareData();
// 创建坐标轴
NumberAxis xAxis = new NumberAxis();
xAxis.setLabel("月份");
xAxis.setAutoRanging(false);
xAxis.setLowerBound(1);
xAxis.setUpperBound(12);
xAxis.setTickUnit(1);
NumberAxis yAxis = new NumberAxis();
yAxis.setLabel("温度 (×0.1℃)");
// 创建折线图
LineChart<Number, Number> lineChart = new LineChart<>(xAxis, yAxis);
lineChart.setTitle("气象站599970 2018年温度变化趋势");
// 添加数据系列
XYChart.Series<Number, Number> dailySeries = new XYChart.Series<>();
dailySeries.setName("每日最高温度");
XYChart.Series<Number, Number> monthlyMaxSeries = new XYChart.Series<>();
monthlyMaxSeries.setName("月最高温度趋势");
// 计算每月最高温度
double[] monthlyMax = new double[12];
int[] monthlyCount = new int[12];
for (TemperatureRecord record : records) {
int month = record.date.getMonthValue();
double temp = record.temperature;
// 添加每日温度点
dailySeries.getData().add(new XYChart.Data<>(
month + (record.date.getDayOfMonth() / 31.0),
temp
));
// 计算每月最高温度
if (temp > monthlyMax[month-1]) {
monthlyMax[month-1] = temp;
}
monthlyCount[month-1]++;
}
// 添加月最高温度趋势线
for (int i = 0; i < 12; i++) {
if (monthlyCount[i] > 0) {
monthlyMaxSeries.getData().add(new XYChart.Data<>(i + 1, monthlyMax[i]));
}
}
lineChart.getData().addAll(dailySeries, monthlyMaxSeries);
// 设置场景并显示
Scene scene = new Scene(lineChart, 800, 600);
primaryStage.setScene(scene);
primaryStage.show();
}
private List<TemperatureRecord> prepareData() {
String data = "2018-01-31\t站点:599970 温度:180\n" +
"2018-01-30\t站点:599970 温度:170\n" +
"2018-02-28\t站点:599970 温度:200\n" +
"2018-02-27\t站点:599970 温度:260\n" +
"2018-03-31\t站点:599970 温度:180\n" +
"2018-03-30\t站点:599970 温度:180\n" +
"2018-04-30\t站点:599970 温度:190\n" +
"2018-04-29\t站点:599970 温度:200\n" +
"2018-05-31\t站点:599970 温度:280\n" +
"2018-05-30\t站点:599970 温度:210\n" +
"2018-06-30\t站点:599970 温度:120\n" +
"2018-06-29\t站点:599970 温度:160\n" +
"2018-07-31\t站点:599970 温度:130\n" +
"2018-07-30\t站点:599970 温度:150\n" +
"2018-08-31\t站点:599970 温度:200\n" +
"2018-08-30\t站点:599970 温度:200\n" +
"2018-09-30\t站点:599970 温度:180\n" +
"2018-09-29\t站点:599970 温度:210\n" +
"2018-10-31\t站点:599970 温度:220\n" +
"2018-10-30\t站点:599970 温度:180\n" +
"2018-11-30\t站点:599970 温度:290\n" +
"2018-11-29\t站点:599970 温度:130\n" +
"2018-12-31\t站点:599970 温度:300\n" +
"2018-12-30\t站点:599970 温度:180";
List<TemperatureRecord> records = new ArrayList<>();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
for (String line : data.split("\n")) {
String[] parts = line.split("\t");
LocalDate date = LocalDate.parse(parts[0], formatter);
int temp = Integer.parseInt(parts[1].split("温度:")[1]);
records.add(new TemperatureRecord(date, temp));
}
return records;
}
public static void main(String[] args) {
launch(args);
}
}
转为hive代码