map代码:
package com.traffic;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyTrafficMapper extends Mapper<LongWritable,Text,Text,Text> {
String[] title=new String[44];
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] line=value.toString().split(",");
//字节偏移量
if(key.toString().equals("0")){
title=line;
}else {
try {
if (line.length > 43) {
for (int i = 0; i < 44; i++) {
context.write(new Text(title[i]), new Text(line[i]));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
reduce代码:
package com.traffic;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyTrafficReduce extends Reducer<Text,Text,Text,Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
//统计每列的最大值最小值
float min = Float.MAX_VALUE;
float max = Float.MIN_VALUE;
int count=0;
Pattern pattern=Pattern.compile("[0-9]+\\.?[0-9]+");
for (Text value : values) {
if (value.toString().equals("")){
count+=1;
}
Matcher isNum=pattern.matcher(value.toString());
if (isNum.matches()) {
float data = Float.parseFloat(value.toString());
if (data > max) {
max = data;
}
if (data < min) {
min = data;
}
}
}
if (min!= Float.MAX_VALUE) {
context.write(key, new Text(count+"\t"+min + "\t" + max));
}else {
context.write(key, new Text(count+"\t"+"无\t无"));
}
}
}
主类:
package com.traffic;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MyTrafficDrive {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
Job job= Job.getInstance();
job.setJarByClass(MyTrafficDrive.class);
job.setMapperClass(MyTrafficMapper.class);
job.setReducerClass(MyTrafficReduce.class);
SetJobs(job);
Path path=SetPath(job);
path.getFileSystem(conf).delete(path,true);
System.exit(job.waitForCompletion(true)?0:1);
}
private static Path SetPath(Job job) throws IOException {
FileInputFormat.addInputPath(job,new Path("E:\\data1"));
Path path=new Path("E:\\data1\\out");
FileOutputFormat.setOutputPath(job,path);
return path;
}
private static void SetJobs(Job job) {
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
}
}