离线数仓DWD层
- DWD层设计要点:
- 9.1 交易域加购事务事实表
- 9.2 交易域下单事务事实表
- 9.3 交易域取消订单事务事实表
- 9.4 交易域支付成功事务事实表
- 9.5 交易域退单事务事实表
- 9.6 交易域退款成功事务事实表
- 9.7 交易域购物车周期快照事实表
- 9.8 工具域优惠券领取事务事实表
- 9.9 工具域优惠券使用(下单)事务事实表
- 9.10 工具域优惠券使用(支付)事务事实表
- 9.11 互动域收藏商品事务事实表
- 9.12 互动域评价事务事实表
- 9.13 流量域页面浏览事务事实表
- 9.14 流量域启动事务事实表
- 9.15 流量域动作事务事实表
- 9.16 流量域曝光事务事实表
- 9.17 流量域错误事务事实表
- 9.18 用户域用户注册事务事实表
- 9.19 用户域用户登录事务事实表
- 9.20 数据装载脚本
DWD层设计要点:
(1)DWD层的设计依据是维度建模理论,该层存储维度模型的事实表。
(2)DWD层的数据存储格式为orc列式存储+snappy压缩。
(3)DWD层表名的命名规范为dwd_数据域_表名_单分区增量全量标识(inc/full)
9.1 交易域加购事务事实表
1)建表语句
DROP TABLE IF EXISTS dwd_trade_cart_add_inc;
CREATE EXTERNAL TABLE dwd_trade_cart_add_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`date_id` STRING COMMENT '时间id',
`create_time` STRING COMMENT '加购时间',
`source_id` STRING COMMENT '来源类型ID',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '加购物车件数'
) COMMENT '交易域加购物车事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cart_add_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
2)分区规划
3)数据装载
(1)数据流向
(2)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_trade_cart_add_inc partition (dt)
select
id,
user_id,
sku_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic.dic_name,
sku_num,
date_format(create_time, 'yyyy-MM-dd')
from
(
select
data.id,
data.user_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num
from ods_cart_info_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
)ci
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='24'
)dic
on ci.source_type=dic.dic_code;
(3)每日装载
insert overwrite table dwd_trade_cart_add_inc partition(dt='2020-06-15')
select
id,
user_id,
sku_id,
date_id,
create_time,
source_id,
source_type_code,
source_type_name,
sku_num
from
(
select
data.id,
data.user_id,
data.sku_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts*1000,'GMT+8'),'yyyy-MM-dd HH:mm:ss') create_time,
data.source_id,
data.source_type source_type_code,
if(type='insert',data.sku_num,data.sku_num-old['sku_num']) sku_num
from ods_cart_info_inc
where dt='2020-06-15'
and (type='insert'
or(type='update' and old['sku_num'] is not null and data.sku_num>cast(old['sku_num'] as int)))
)cart
left join
(
select
dic_code,
dic_name source_type_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='24'
)dic
on cart.source_type_code=dic.dic_code;
9.2 交易域下单事务事实表
1)建表语句
DROP TABLE IF EXISTS dwd_trade_order_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_order_detail_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`date_id` STRING COMMENT '下单日期id',
`create_time` STRING COMMENT '下单时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始价格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '优惠券优惠分摊',
`split_total_amount` DECIMAL(16, 2) COMMENT '最终价格分摊'
) COMMENT '交易域下单明细事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_order_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载
(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_trade_order_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(create_time, 'yyyy-MM-dd') date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(create_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ods_order_info_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
(2)每日装载
insert overwrite table dwd_trade_order_detail_inc partition (dt='2020-06-15')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_id,
create_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
date_format(data.create_time, 'yyyy-MM-dd') date_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where dt = '2020-06-15'
and type = 'insert'
) od
left join
(
select
data.id,
data.user_id,
data.province_id
from ods_order_info_inc
where dt = '2020-06-15'
and type = 'insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where dt = '2020-06-15'
and type = 'insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where dt = '2020-06-15'
and type = 'insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
9.3 交易域取消订单事务事实表
1)建表语句
DROP TABLE IF EXISTS dwd_trade_cancel_detail_inc;
CREATE EXTERNAL TABLE dwd_trade_cancel_detail_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`date_id` STRING COMMENT '取消订单日期id',
`cancel_time` STRING COMMENT '取消订单时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '原始价格',
`split_activity_amount` DECIMAL(16, 2) COMMENT '活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '优惠券优惠分摊',
`split_total_amount` DECIMAL(16, 2) COMMENT '最终价格分摊'
) COMMENT '交易域取消订单明细事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_cancel_detail_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载
(1)首日装载
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_trade_cancel_detail_inc partition (dt)
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(canel_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ods_order_info_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
and data.order_status='1003'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
(2)每日装载
insert overwrite table dwd_trade_cancel_detail_inc partition (dt='2020-06-15')
select
od.id,
order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
date_format(canel_time,'yyyy-MM-dd') date_id,
canel_time,
source_id,
source_type,
dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where (dt='2020-06-15' or dt=date_add('2020-06-15',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) od
join
(
select
data.id,
data.user_id,
data.province_id,
data.operate_time canel_time
from ods_order_info_inc
where dt = '2020-06-15'
and type = 'update'
and data.order_status='1003'
and array_contains(map_keys(old),'order_status')
) oi
on order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where (dt='2020-06-15' or dt=date_add('2020-06-15',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where (dt='2020-06-15' or dt=date_add('2020-06-15',-1))
and (type = 'insert' or type= 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='24'
)dic
on od.source_type=dic.dic_code;
9.4 交易域支付成功事务事实表
1)建表语句
DROP TABLE IF EXISTS dwd_trade_pay_detail_suc_inc;
CREATE EXTERNAL TABLE dwd_trade_pay_detail_suc_inc
(
`id` STRING COMMENT '编号',
`order_id` STRING COMMENT '订单id',
`user_id` STRING COMMENT '用户id',
`sku_id` STRING COMMENT '商品id',
`province_id` STRING COMMENT '省份id',
`activity_id` STRING COMMENT '参与活动规则id',
`activity_rule_id` STRING COMMENT '参与活动规则id',
`coupon_id` STRING COMMENT '使用优惠券id',
`payment_type_code` STRING COMMENT '支付类型编码',
`payment_type_name` STRING COMMENT '支付类型名称',
`date_id` STRING COMMENT '支付日期id',
`callback_time` STRING COMMENT '支付成功时间',
`source_id` STRING COMMENT '来源编号',
`source_type_code` STRING COMMENT '来源类型编码',
`source_type_name` STRING COMMENT '来源类型名称',
`sku_num` BIGINT COMMENT '商品数量',
`split_original_amount` DECIMAL(16, 2) COMMENT '应支付原始金额',
`split_activity_amount` DECIMAL(16, 2) COMMENT '支付活动优惠分摊',
`split_coupon_amount` DECIMAL(16, 2) COMMENT '支付优惠券优惠分摊',
`split_payment_amount` DECIMAL(16, 2) COMMENT '支付金额'
) COMMENT '交易域成功支付事务事实表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
LOCATION '/warehouse/gmall/dwd/dwd_trade_pay_detail_suc_inc/'
TBLPROPERTIES ('orc.compress' = 'snappy');
2)数据装载
(1)首日装载
insert overwrite table dwd_trade_pay_detail_suc_inc partition (dt)
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount,
date_format(callback_time,'yyyy-MM-dd')
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ods_payment_info_inc
where dt='2020-06-14'
and type='bootstrap-insert'
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ods_order_info_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where dt = '2020-06-14'
and type = 'bootstrap-insert'
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-14'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
(2)每日装载
insert overwrite table dwd_trade_pay_detail_suc_inc partition (dt='2020-06-15')
select
od.id,
od.order_id,
user_id,
sku_id,
province_id,
activity_id,
activity_rule_id,
coupon_id,
payment_type,
pay_dic.dic_name,
date_format(callback_time,'yyyy-MM-dd') date_id,
callback_time,
source_id,
source_type,
src_dic.dic_name,
sku_num,
split_original_amount,
split_activity_amount,
split_coupon_amount,
split_total_amount
from
(
select
data.id,
data.order_id,
data.sku_id,
data.source_id,
data.source_type,
data.sku_num,
data.sku_num * data.order_price split_original_amount,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount
from ods_order_detail_inc
where (dt = '2020-06-15' or dt = date_add('2020-06-15',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) od
join
(
select
data.user_id,
data.order_id,
data.payment_type,
data.callback_time
from ods_payment_info_inc
where dt='2020-06-15'
and type='update'
and array_contains(map_keys(old),'payment_status')
and data.payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
data.id,
data.province_id
from ods_order_info_inc
where (dt = '2020-06-15' or dt = date_add('2020-06-15',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) oi
on od.order_id = oi.id
left join
(
select
data.order_detail_id,
data.activity_id,
data.activity_rule_id
from ods_order_detail_activity_inc
where (dt = '2020-06-15' or dt = date_add('2020-06-15',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) act
on od.id = act.order_detail_id
left join
(
select
data.order_detail_id,
data.coupon_id
from ods_order_detail_coupon_inc
where (dt = '2020-06-15' or dt = date_add('2020-06-15',-1))
and (type = 'insert' or type = 'bootstrap-insert')
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from ods_base_dic_full
where dt='2020-06-15'
and parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
9.5 交易域退单事务事实表
1)建表语句
DROP TABLE IF EXISTS dwd_trade_order_refund_inc;
CREATE EXTERNAL TABLE dwd_trade_order_refund_inc
(
`id` STRING COMMENT '编号',
`user_id` STRING COMMENT '用户ID',
`order_id` STRING COMMENT '订单ID',
`sku_id` STRING COMMENT '商品ID',
`province_id` STRING COMMENT '地区ID',
`date_id` STRING COMMENT '日期ID',
`create_time` STRING COMMENT '退单时间',
`refund_type_code` STRING COMMENT '退单类型编码',
`refund_type_name` STRING COMMENT '退单类型名称',
`refund_reason_type_code` STRING COMMENT '退单原因类型编码',
`refund_reason_type_name` STRING COMMENT '退单原因类型名称',
`refund_reason_txt` STRING COMMENT '退单原因描述',
`refund_num` BIGINT COMMENT '退单件数',
`refund_amount` DECIMAL(