1.
package com.simple.mr;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxMinMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
// 定义一个固定的key,这样所有的数据都会被发送到同一个Reducer
private final Text outputKey = new Text("MaxMin");
private LongWritable number = new LongWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 1. 将Text类型的value(一行文本)转换为String
String line = value.toString();
// 2. 去除行首尾的空格
line = line.trim();
// 3. 检查行是否为空
if (!line.isEmpty()) {
try {
// 4. 将字符串转换为Long类型
long num = Long.parseLong(line);
// 5. 设置number的值为当前解析出的数字
number.set(num);
// 6. 写入上下文:固定Key ("MaxMin") 和 数字值
// 格式:("MaxMin", 10), ("MaxMin", 25), ...
context.write(outputKey, number);
} catch (NumberFormatException e) {
// 如果某行不是数字,忽略它(或者可以记录错误日志)
System.err.println("Ignoring invalid number: " + line);
}
}
}
}
2.
package com.simple.mr;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxMinReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
// 定义两个LongWritable对象用于存放结果
private LongWritable maxValue = new LongWritable();
private LongWritable minValue = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
// 初始化最大值和最小值
// 注意:不能直接给Long.MIN_VALUE和Long.MAX_VALUE,因为Iterable可能为空
long max = Long.MIN_VALUE;
long min = Long.MAX_VALUE;
// 遍历传入的所有数字(values)
for (LongWritable val : values) {
long currentValue = val.get();
// 比较并更新最大值
if (currentValue > max) {
max = currentValue;
}
// 比较并更新最小值
if (currentValue < min) {
min = currentValue;
}
}
// 设置最终结果
maxValue.set(max);
minValue.set(min);
// 写入最终结果
// 输出两行:("最大值", 100) 和 ("最小值", 3)
context.write(new Text("最大值"), maxValue);
context.write(new Text("最小值"), minValue);
}
}
3.
package com.simple.mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxMinDriver {
public static void main(String[] args) throws Exception {
// 1. 获取配置信息并实例化一个Job对象
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Max Min Number");
// 2. 指定本程序的Jar包所在的本地路径(如果打包成Jar在集群运行则需要)
// 在Eclipse中运行,它会自动找到包含这个类的Jar。
job.setJarByClass(MaxMinDriver.class);
// 3. 指定本业务作业要使用的Mapper/Reducer业务类
job.setMapperClass(MaxMinMapper.class);
job.setReducerClass(MaxMinReducer.class);
// 4. 指定Mapper输出数据的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
// 5. 指定最终输出的数据的kv类型(即Reducer的输出类型)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 6. 指定job的输入原始文件所在目录(从命令行参数获取)
FileInputFormat.addInputPath(job, new Path(args[0]));
// 7. 指定job的输出结果所在目录(从命令行参数获取)
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 设置Reducer任务数量为1(因为我们希望所有数据到一个Reducer中计算全局最值)
job.setNumReduceTasks(1);
// 9. 提交作业并等待完成
boolean result = job.waitForCompletion(true);
// 10. 退出程序,返回结果状态
System.exit(result ? 0 : 1);
}
}
/input/maxmin /output/maxmin_result
hadoop fs -cat /output/maxmin_result/part-r-00000

被折叠的 条评论
为什么被折叠?



