对输入文件中数据进行就算学生平均成绩。输入文件中的每行内容均为一个学生的姓名和他相应的成绩,如果有多门学科,则每门学科为一个文件。要求在输出中每行有两个间隔的数据,其中,第一个代表学生的姓名,第二个代表其平均成绩。
package bin;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class AverageScore {
/**
* map类,继承mapper接口,实现其中的map()抽象方法
*/
static class AverScorMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
public void map(LongWritable key,Text value,Context context) {
String line= value.toString();
StringTokenizer tokenizer =new StringTokenizer(line, "\n");//整个txt文件以换行符分割成为一个大的StringTokenizer
while (tokenizer.hasMoreElements()) {//对于每一行用StringTokenizer再次做了细分,分割用的另一个StringTokenizer的构造函数一个参数的构造函数,是以“ ”空格作为切分点的
StringTokenizer tokenizerline = new StringTokenizer(tokenizer.nextToken());
String strName=tokenizerline.nextToken();
String strScore=tokenizerline.nextToken();
Text name=new Text(strName);
int scoreInt=Integer.parseInt(strScore);
try {
context.write(name, new IntWritable(scoreInt));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
/**
* reduce类,继承reducer接口,实现其中的reduce()抽象方法
*/
static class AverScorReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
public void reduce(Text key,Iterable<IntWritable> values,Context context) {
int sum=0;
int count=0;
for (IntWritable value : values) {
sum += value.get();
count++;
}
int average=(int)(sum/count);
try {
context.write(key, new IntWritable(average));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 主函数
* @param args
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
Configuration configuration=new Configuration();
String[] otherArgs=new GenericOptionsParser(configuration, args).getRemainingArgs();
if (otherArgs.length !=2) {
System.err.println("Usage: AverageScore <in> <out>");
System.exit(2);
}
Job job =new Job(configuration, "tracert AverageScore");
job.setJarByClass(AverageScore.class);
job.setMapperClass(AverScorMapper.class);
job.setReducerClass(AverScorReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true)? 0 : 1);
}
}