import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Salave extends Configured implements Tool {
enum Counter {
LINESKIP,
}
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString().trim();
line = line.replaceAll("[' ']+", ",");
if (line.contains("select") || line.contains("DEPTNO")
|| line.contains("----") || line.equals("")) {
context.getCounter(Counter.LINESKIP).increment(1);
return;
} else {
try {
String[] lineSplit = line.split(",");
System.out.println(">>>>>>>>>>>>lineSplit.length "
+ lineSplit.length);
if (lineSplit.length == 3) {
String deptno = lineSplit[0];
String dname = lineSplit[1];
context.write(new Text(deptno), new Text("d" + dname));
} else if (lineSplit.length == 8) {
String deptno = lineSplit[lineSplit.length - 1];
String sal = lineSplit[lineSplit.length - 3];
context.write(new Text(deptno), new Text(sal));
} else if (lineSplit.length == 7 || lineSplit.length == 6) {
String deptno = lineSplit[lineSplit.length - 1];
String sal = lineSplit[lineSplit.length - 2];
context.write(new Text(deptno), new Text(sal));
}
} catch (java.lang.ArrayIndexOutOfBoundsException e) {
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String valueString;
int sumsal = 0;
String dname = "";
int num = 0;
for (Text value : values) {
valueString = value.toString();
if (!"".equals(valueString)) {
if (valueString.charAt(0) == 'd') {
dname = valueString.substring(1);
} else {
int sal = Integer.parseInt(valueString);
sumsal += sal;
num += 1;
}
}
}
String out1= "";
if(num==0){
out1 = "0";
}else{
out1 = Integer.toString(sumsal/num);
}
String out2 = Integer.toString(num);
String out = out1 +" "+out2;
context.write(new Text(dname), new Text(out));
}
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
Job job = new Job(conf, "Salave");
job.setJarByClass(Salave.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.waitForCompletion(true);
System.out.println("jobname " + job.getJobName());
System.out.println("issuc " + (job.isSuccessful() ? "y" : "n"));
System.out.println("hangnumin "
+ job.getCounters()
.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_INPUT_RECORDS").getValue());
System.out.println("hangnumout "
+ job.getCounters()
.findCounter("org.apache.hadoop.mapred.Task$Counter",
"MAP_OUTPUT_RECORDS").getValue());
System.out.println("skiphang "
+ job.getCounters().findCounter(Counter.LINESKIP).getValue());
return job.isSuccessful() ? 0 : 1;
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("");
System.err.println("Usage: Salsum < input path > < output path > ");
System.err
.println("Example: hadoop jar ~/Test_2.jar hdfs://localhost:9000/home/james/Test_2 hdfs://localhost:9000/home/james/output");
System.err.println("Counter:");
System.err.println("\t" + "LINESKIP" + "\t"
+ "Lines which are too short");
System.exit(-1);
}
DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date start = new Date();
int res = ToolRunner.run(new Configuration(), new Salave(), args);
Date end = new Date();
float time = (float) ((end.getTime() - start.getTime()) / 60000.0);
System.out.println("start " + formatter.format(start));
System.out.println("end " + formatter.format(end));
System.out.println("time " + String.valueOf(time) + " min");
System.exit(res);
}
}
求工作人员工资 hadoop

最新推荐文章于 2024-03-13 22:46:10 发布
