package hadoop.test.csc;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class MaxTemeratureMapper extends Configured implements Tool {
/**
* 计数器
* 用于计数各种异常数据
*/
enum Counter
{
LINESKIP, //出错的行
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "MaxTemeratureMapper"); // 任务名
job.setJarByClass(MaxTemeratureMapper.class); // 指定Class
FileInputFormat.addInputPath(job, new Path(args[0])); // 输入路径
FileOutputFormat.setOutputPath(job, new Path(args[1])); // 输出路径
job.setMapperClass(Map.class); // 调用上面Map类作为Map任务代码
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class); // 指定输出的KEY的格式
job.setOutputValueClass(IntWritable.class); // 指定输出的VALUE的格式
job.waitForCompletion(true);
// 输出任务完成情况
System.out.println("任务名称:" + job.getJobName());
System.out.println("任务成功:" + (job.isSuccessful() ? "是" : "否"));
System.out.println("输入行数:"
+ job.getCounters()
.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_INPUT_RECORDS").getValue());
System.out.println("输出行数:"
+ job.getCounters()
.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_OUTPUT_RECORDS").getValue());
System.out.println("跳过的行:"
+ job.getCounters().findCounter(Counter.LINESKIP).getValue());
return job.isSuccessful() ? 0 : 1;
}
/**
* MAP任务
*/
public static class Map extends
Mapper<LongWritable, Text, Text, IntWritable> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString(); // 读取源数据
try {
// 数据处理
String year = line.substring(15, 19);
System.out.println(year);
Text tYear = new Text(year); //
int temperature = Integer.parseInt(line.substring(88, 93));
IntWritable iTemperature = new IntWritable(temperature);
context.write(tYear, iTemperature);
} catch (java.lang.ArrayIndexOutOfBoundsException e) {
context.getCounter(Counter.LINESKIP).increment(1); // 出错令计数器+1
return;
}
}
}
/**
* Reduce任务
*/
public static class Reduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
IntWritable result = new IntWritable();
for (IntWritable var : values) {
maxValue = Math.max(maxValue, var.get());
}
result.set(maxValue);
context.write(key, result);
}
}
/**
* 设置系统说明
* 设置MapReduce任务
*/
public static void main(String[] args) throws Exception
{
//判断参数个数是否正确
//如果无参数运行则显示以作程序说明
if ( args.length != 2 )
{
System.err.println("");
System.err.println("Usage: MaxTemeratureMapper< input path > < output path > ");
System.err.println("Example: hadoop jar ~/MaxTemeratureMapper.jar hdfs://10.3.19.199:9000/user/hadoop/TempratureData/ hdfs://10.3.19.199:9000/user/hadoop/TempratureData/Temerature_1_output");
System.err.println("Counter:");
System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");
System.exit(-1);
}
//记录开始时间
DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );
Date start = new Date();
//运行任务
int res = ToolRunner.run(new Configuration(), new MaxTemeratureMapper(), args);
//输出任务耗时
Date end = new Date();
float time = (float) (( end.getTime() - start.getTime() ) / 60000.0) ;
System.out.println( "任务开始:" + formatter.format(start) );
System.out.println( "任务结束:" + formatter.format(end) );
System.out.println( "任务耗时:" + String.valueOf( time ) + " 分钟" );
System.exit(res);
}
}
气象数据分析代码
最新推荐文章于 2025-06-09 08:40:59 发布