Hadoop电商项目浏览量统计实战代码重构

最新推荐文章于 2025-05-03 02:24:00 发布

kokopop007

最新推荐文章于 2025-05-03 02:24:00 发布

阅读量735

点赞数

分类专栏： Hadoop

本文链接：https://blog.youkuaiyun.com/kokopop007/article/details/99319620

版权

Hadoop 专栏收录该内容

16 篇文章

订阅专栏

上篇文章存在的问题：每个MR作业都去全量读取待处理的原始日志，如果数据流很大，疯掉

ETL：全量数据不方便直接进行计算，最好是晋西一步处理后在进行想要的维度统计分析
解析出你要的字段：Ip==》城市信息
去除一些你不需要的字段：不需要的字段太多了
ip/time/url/page_id/country/provine/city
第一步

import com.imooc.bigdata.hadoop.mr.project.mr.PageStatApp;
import com.imooc.bigdata.hadoop.mr.project.utils.ContentUtils;
import com.imooc.bigdata.hadoop.mr.project.utils.LogParser;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Map;

public class ETLApp {

    public static void main(String[] args) throws Exception{

        Configuration configuration = new Configuration();

        FileSystem fileSystem = FileSystem.get(configuration);
        Path outputPath = new Path(args[1]);
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }


        Job job = Job.getInstance(configuration);
        job.setJarByClass(ETLApp.class);

        job.setMapperClass(MyMapper.class);

        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);


        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }

    static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> {

        private LongWritable ONE = new LongWritable(1);

        private LogParser logParser;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            logParser = new LogParser();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String log = value.toString();

            Map<String, String> info = logParser.parse(log);

            String ip=info.get("ip");
            String country=info.get("country");
            String province=info.get("province");
            String city=info.get("city");
            String url=info.get("url");
            String time=info.get("time");
            String pageId=ContentUtils.getPageId(url);


            StringBuilder builder=new StringBuilder();
            builder.append(ip).append("\t");
            builder.append(country).append("\t");
            builder.append(province).append("\t");
            builder.append(city).append("\t");
            builder.append(url).append("\t");
            builder.append(time).append("\t");
            builder.append(pageId);

            context.write(NullWritable.get(),new Text(builder.toString()));


        }

    }




}

mport org.apache.commons.lang.StringUtils;

import java.util.HashMap;
import java.util.Map;

public class LogParser {

    public Map<String,String> parseV2(String log){

        Map<String,String> info =new HashMap<>();

        IPParser ipParser=IPParser.getInstance();

        if (StringUtils.isNotBlank(log)){
            String [] splits =log.split("\t");

            String ip=splits[0];//第十四哥信息
            String country=splits[1];
            String province=splits[2];
            String city=splits[3];


            info.put("ip",ip);
            info.put("country",country);
            info.put("province",province);
            info.put("city",city);

            String url =splits[4];
            info.put("url",url);

            String time =splits[5];
            info.put("time",time);

            String pageId=splits[6];
            info.put("pageId",pageId);

        }

        return info;
    }
}

二.PageStat2App

import com.imooc.bigdata.hadoop.mr.project.utils.ContentUtils;
import com.imooc.bigdata.hadoop.mr.project.utils.LogParser;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Map;

/**
 *
 */
public class PageStatV2App {
    public static void main(String[] args) throws Exception {

        Configuration configuration = new Configuration();

        FileSystem fileSystem = FileSystem.get(configuration);
        Path outputPath = new Path(args[1]);
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }


        Job job = Job.getInstance(configuration);
        job.setJarByClass(PageStatV2App.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }

    static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

        private LongWritable ONE = new LongWritable(1);

        private LogParser logParser;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            logParser = new LogParser();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String log = value.toString();

            Map<String, String> info = logParser.parseV2(log);
            String url = info.get("url");

            if (StringUtils.isNotBlank(url)) {

                String pageId = ContentUtils.getPageId(url);//使用工具类去获取uID
                context.write(new Text(pageId), ONE);
            }
        }

        }

        static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
            @Override
            protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
                long count = 0;
                for (LongWritable value : values) {
                    count++;
                }
                context.write(key, new LongWritable(count));
            }
        }
    }

三.ProvinceSta2App


import com.imooc.bigdata.hadoop.mr.project.utils.LogParser;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Map;

import static com.imooc.bigdata.hadoop.mr.project.utils.IPParser.RegionInfo;
import static com.imooc.bigdata.hadoop.mr.project.utils.IPParser.getInstance;

/**
 *  省份浏览量统计
 */
public class ProvinceStatV2App {

    public static void main(String[] args)throws Exception {

        Configuration configuration =new Configuration();

        FileSystem fileSystem=FileSystem.get(configuration);
        Path outputPath=new Path(args[1]);
        if(fileSystem.exists(outputPath)){
            fileSystem.delete(outputPath,true);
        }


        Job job =Job.getInstance(configuration);
        job.setJarByClass(ProvinceStatV2App.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);


    }

    static class MyMapper extends Mapper<LongWritable, Text,Text,LongWritable>{

        private  LongWritable ONE=new LongWritable(1);

        private LogParser logParser;

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            logParser =new LogParser();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String log =value.toString();

            Map<String,String> info=logParser.parseV2(log);
            context.write(new Text(info.get("province")),ONE);


        }


    }
    static class MyReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long count =0;
            for(LongWritable value:values){
                count++;
            }
            context.write(key,new LongWritable(count));
        }
    }


}

四.PvStat2App

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;


public class PVStatV2App {
    public static void main(String[] args) throws Exception{

        Configuration configuration =new Configuration();

        FileSystem fileSystem=FileSystem.get(configuration);
        Path outputPath=new Path(args[1]);
        if(fileSystem.exists(outputPath)){
            fileSystem.delete(outputPath,true);
        }


        Job job =Job.getInstance(configuration);
        job.setJarByClass(PVStatV2App.class);

        job.setMapperClass(Mymapper.class);
        job.setReducerClass(MyReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(LongWritable.class);

        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));

        job.waitForCompletion(true);


    }

    static class Mymapper extends Mapper<LongWritable, Text, Text, LongWritable> {

        private  Text KEY=new Text("key");
        private  LongWritable ONE=new LongWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            context.write(KEY,ONE);
        }
    }
    static class MyReducer extends Reducer<Text,LongWritable, NullWritable,LongWritable>{
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long count =0;
            for(LongWritable value :values){
                count++;
            }
            context.write(NullWritable.get(),new LongWritable(count));
        }
    }
}