ADS层数据导出
前言
前面完成了ADS层所有表的设计和实现,下面就要将这些数据导出到MySQL数据库中,方便可视化的使用。
一、报表数据导出
1.MySQL建库建表
我们要将HDFS里的统计数据导出到MySQL中,首先要建数据库,再建相应的表。
1.1 创建数据库
CREATE DATABASE IF NOT EXISTS gmall_report DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
1.2 创建表
1)各渠道流量统计
Hive中的表:
DROP TABLE IF EXISTS ads_traffic_stats_by_channel;
CREATE EXTERNAL TABLE ads_traffic_stats_by_channel
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`channel` STRING COMMENT '渠道',
`uv_count` BIGINT COMMENT '访客人数',
`avg_duration_sec` BIGINT COMMENT '会话平均停留时长,单位为秒',
`avg_page_count` BIGINT COMMENT '会话平均浏览页面数',
`sv_count` BIGINT COMMENT '会话数',
`bounce_rate` DECIMAL(16, 2) COMMENT '跳出率'
) COMMENT '各渠道流量统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_traffic_stats_by_channel/';
MySQL中的表:
DROP TABLE IF EXISTS `ads_traffic_stats_by_channel`;
CREATE TABLE `ads_traffic_stats_by_channel` (
`dt` date NOT NULL COMMENT '统计日期',
`recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`channel` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '渠道',
`uv_count` bigint(20) NULL DEFAULT NULL COMMENT '访客人数',
`avg_duration_sec` bigint(20) NULL DEFAULT NULL COMMENT '会话平均停留时长,单位为秒',
`avg_page_count` bigint(20) NULL DEFAULT NULL COMMENT '会话平均浏览页面数',
`sv_count` bigint(20) NULL DEFAULT NULL COMMENT '会话数',
`bounce_rate` decimal(16, 2) NULL DEFAULT NULL COMMENT '跳出率',
PRIMARY KEY (`dt`, `recent_days`, `channel`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各渠道流量统计' ROW_FORMAT = Dynamic;
2)路径分析
Hive中的表:
DROP TABLE IF EXISTS ads_page_path;
CREATE EXTERNAL TABLE ads_page_path
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`source` STRING COMMENT '跳转起始页面ID',
`target` STRING COMMENT '跳转终到页面ID',
`path_count` BIGINT COMMENT '跳转次数'
) COMMENT '页面浏览路径分析'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_page_path/';
MySQL建表语句:
DROP TABLE IF EXISTS `ads_page_path`;
CREATE TABLE `ads_page_path` (
`dt` date NOT NULL COMMENT '统计日期',
`recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`source` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '跳转起始页面ID',
`target` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '跳转终到页面ID',
`path_count` bigint(20) NULL DEFAULT NULL COMMENT '跳转次数',
PRIMARY KEY (`dt`, `recent_days`, `source`, `target`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '页面浏览路径分析' ROW_FORMAT = Dynamic;
3)用户变动统计
Hive中的表:
DROP TABLE IF EXISTS ads_user_change;
CREATE EXTERNAL TABLE ads_user_change
(
`dt` STRING COMMENT '统计日期',
`user_churn_count` BIGINT COMMENT '流失用户数',
`user_back_count` BIGINT COMMENT '回流用户数'
) COMMENT '用户变动统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_change/';
MySQL建表语句:
DROP TABLE IF EXISTS `ads_user_change`;
CREATE TABLE `ads_user_change` (
`dt` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '统计日期',
`user_churn_count` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '流失用户数',
`user_back_count` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '回流用户数',
PRIMARY KEY (`dt`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '用户变动统计' ROW_FORMAT = Dynamic;
4)用户留存率
Hive建表语句:
DROP TABLE IF EXISTS ads_user_retention;
CREATE EXTERNAL TABLE ads_user_retention
(
`dt` STRING COMMENT '统计日期',
`create_date` STRING COMMENT '用户新增日期',
`retention_day` INT COMMENT '截至当前日期留存天数',
`retention_count` BIGINT COMMENT '留存用户数量',
`new_user_count` BIGINT COMMENT '新增用户数量',
`retention_rate` DECIMAL(16, 2) COMMENT '留存率'
) COMMENT '用户留存率'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_retention/';
MySQL建表语句:
DROP TABLE IF EXISTS `ads_user_retention`;
CREATE TABLE `ads_user_retention` (
`dt` date NOT NULL COMMENT '统计日期',
`create_date` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '用户新增日期',
`retention_day` int(20) NOT NULL COMMENT '截至当前日期留存天数',
`retention_count` bigint(20) NULL DEFAULT NULL COMMENT '留存用户数量',
`new_user_count` bigint(20) NULL DEFAULT NULL COMMENT '新增用户数量',
`retention_rate` decimal(16, 2) NULL DEFAULT NULL COMMENT '留存率',
PRIMARY KEY (`dt`, `create_date`, `retention_day`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '留存率' ROW_FORMAT = Dynamic;
5)用户新增活跃统计
Hive建表语句:
DROP TABLE IF EXISTS ads_user_stats;
CREATE EXTERNAL TABLE ads_user_stats
(
`dt` STRING COMMENT '统计日期',
`recent_days` BIGINT COMMENT '最近n日,1:最近1日,7:最近7日,30:最近30日',
`new_user_count` BIGINT COMMENT '新增用户数',
`active_user_count` BIGINT COMMENT '活跃用户数'
) COMMENT '用户新增活跃统计'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/warehouse/gmall/ads/ads_user_stats/';
MySQL建表语句:
DROP TABLE IF EXISTS `ads_user_stats`;
CREATE TABLE `ads_user_stats` (
`dt` date NOT NULL COMMENT '统计日期',
`recent_days` bigint