求工作人员工资 hadoop

批量数据处理与分析:Hadoop与MapReduce的应用实例
本文介绍了如何使用Hadoop和MapReduce进行大规模数据处理与分析,包括数据导入、数据清洗、数据转换、数据分析等多个步骤。通过实例展示了如何有效地处理和分析大数据集,从而提取有价值的信息和洞察。
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Salave extends Configured implements Tool {
        enum Counter {
                LINESKIP,
        }
        public static class Map extends Mapper<LongWritable, Text, Text, Text> {
                public void map(LongWritable key, Text value, Context context)
                                throws IOException, InterruptedException {
                        String line = value.toString().trim();
                        line = line.replaceAll("[' ']+", ",");
                        if (line.contains("select") || line.contains("DEPTNO")
                                        || line.contains("----") || line.equals("")) {
                                context.getCounter(Counter.LINESKIP).increment(1);
                                return;
                        } else {
                                try {
                                        String[] lineSplit = line.split(",");
                                        System.out.println(">>>>>>>>>>>>lineSplit.length   "
                                                        + lineSplit.length);
                                        if (lineSplit.length == 3) {
                                                String deptno = lineSplit[0];
                                                String dname = lineSplit[1];
                                                context.write(new Text(deptno), new Text("d" + dname));
                                        } else if (lineSplit.length == 8) {
                                                String deptno = lineSplit[lineSplit.length - 1];
                                                String sal = lineSplit[lineSplit.length - 3];
                                                context.write(new Text(deptno), new Text(sal));
                                        } else if (lineSplit.length == 7 || lineSplit.length == 6) {
                                                String deptno = lineSplit[lineSplit.length - 1];
                                                String sal = lineSplit[lineSplit.length - 2];
                                                context.write(new Text(deptno), new Text(sal));
                                        }
                                } catch (java.lang.ArrayIndexOutOfBoundsException e) {
                                        context.getCounter(Counter.LINESKIP).increment(1);
                                        return;
                                }
                        }
                }
        }
        public static class Reduce extends Reducer<Text, Text, Text, Text> {
                public void reduce(Text key, Iterable<Text> values, Context context)
                                throws IOException, InterruptedException {
                        String valueString;
                        int sumsal = 0;
                        String dname = "";
                        int num = 0;
                        for (Text value : values) {
                                valueString = value.toString();
                                if (!"".equals(valueString)) {
                                        if (valueString.charAt(0) == 'd') {
                                                dname = valueString.substring(1);
                                        } else {
                                                int sal = Integer.parseInt(valueString);
                                                sumsal += sal;
                                                num += 1;
                                        }
                                }
                        }
                        String out1= "";
                        if(num==0){
                                out1 = "0";
                        }else{
                                out1 = Integer.toString(sumsal/num);
                        }
                        String out2 = Integer.toString(num);
                        String out = out1 +"  "+out2;
                        context.write(new Text(dname), new Text(out));
                }
        }
        @Override
        public int run(String[] args) throws Exception {
                Configuration conf = getConf();
                Job job = new Job(conf, "Salave");
                job.setJarByClass(Salave.class);
                FileInputFormat.addInputPath(job, new Path(args[0]));
                FileOutputFormat.setOutputPath(job, new Path(args[1]));
                job.setMapperClass(Map.class);
                job.setReducerClass(Reduce.class);
                job.setOutputFormatClass(TextOutputFormat.class);
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(Text.class);
                job.waitForCompletion(true);
                System.out.println("jobname    " + job.getJobName());
                System.out.println("issuc    " + (job.isSuccessful() ? "y" : "n"));
                System.out.println("hangnumin    "
                                + job.getCounters()
                                                .findCounter("org.apache.hadoop.mapred.Task$Counter",
                                                                "MAP_INPUT_RECORDS").getValue());
                System.out.println("hangnumout    "
                                + job.getCounters()
                                                .findCounter("org.apache.hadoop.mapred.Task$Counter",
                                                                "MAP_OUTPUT_RECORDS").getValue());
                System.out.println("skiphang    "
                                + job.getCounters().findCounter(Counter.LINESKIP).getValue());
                return job.isSuccessful() ? 0 : 1;
        }
        public static void main(String[] args) throws Exception {
                if (args.length != 2) {
                        System.err.println("");
                        System.err.println("Usage: Salsum < input path > < output path > ");
                        System.err
                                        .println("Example: hadoop jar ~/Test_2.jar hdfs://localhost:9000/home/james/Test_2 hdfs://localhost:9000/home/james/output");
                        System.err.println("Counter:");
                        System.err.println("\t" + "LINESKIP" + "\t"
                                        + "Lines which are too short");
                        System.exit(-1);
                }
                DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                Date start = new Date();
                int res = ToolRunner.run(new Configuration(), new Salave(), args);
                Date end = new Date();
                float time = (float) ((end.getTime() - start.getTime()) / 60000.0);
                System.out.println("start   " + formatter.format(start));
                System.out.println("end    " + formatter.format(end));
                System.out.println("time    " + String.valueOf(time) + " min");
                System.exit(res);
        }
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值