5.5 离线数仓—DWS层数据装载脚本

本文档详细介绍了离线数仓DWS层的数据装载脚本,包括DWS最近1日汇总表的首日和每日装载脚本,DWS最近n日汇总表的统一装载脚本,以及DWS历史至今td汇总表的首日和每日数据装载过程。


前言

前面完成了DWS层所有表的设计和开发,为了方便使用,准备一下数据装载的脚本。

一、DWS最近1日汇总表

1.首日装载脚本

脚本名称:dwd_to_dws_1d_init.sh
脚本内容:

#!/bin/bash
APP=gmall

if [ -n "$2" ] ;then
   do_date=$2
else 
   echo "请传入日期参数"
   exit
fi

dws_trade_province_order_1d="
insert overwrite table ${APP}.dws_trade_province_order_1d partition(dt)
select
    province_id,
    province_name,
    area_code,
    iso_code,
    iso_3166_2,
    order_count_1d,
    order_original_amount_1d,
    activity_reduce_amount_1d,
    coupon_reduce_amount_1d,
    order_total_amount_1d,
    dt
from
(
    select
        province_id,
        count(distinct(order_id)) order_count_1d,
        sum(split_original_amount) order_original_amount_1d,
        sum(nvl(split_activity_amount,0)) activity_reduce_amount_1d,
        sum(nvl(split_coupon_amount,0)) coupon_reduce_amount_1d,
        sum(split_total_amount) order_total_amount_1d,
        dt
    from ${APP}.dwd_trade_order_detail_inc
    group by province_id,dt
)o
left join
(
    select
        id,
        province_name,
        area_code,
        iso_code,
        iso_3166_2
    from ${APP}.dim_province_full
    where dt='$do_date'
)p
on o.province_id=p.id;
"
dws_trade_user_cart_add_1d="
insert overwrite table ${APP}.dws_trade_user_cart_add_1d partition(dt)
select
    user_id,
    count(*),
    sum(sku_num),
    dt
from ${APP}.dwd_trade_cart_add_inc
group by user_id,dt;
"
dws_trade_user_order_1d="
insert overwrite table ${APP}.dws_trade_user_order_1d partition(dt)
select
    user_id,
    count(distinct(order_id)),
    sum(sku_num),
    sum(split_original_amount),
    sum(nvl(split_activity_amount,0)),
    sum(nvl(split_coupon_amount,0)),
    sum(split_total_amount),
    dt
from ${APP}.dwd_trade_order_detail_inc
group by user_id,dt;
"
dws_trade_user_order_refund_1d="
insert overwrite table ${APP}.dws_trade_user_order_refund_1d partition(dt)
select
    user_id,
    count(*) order_refund_count,
    sum(refund_num) order_refund_num,
    sum(refund_amount) order_refund_amount,
    dt
from ${APP}.dwd_trade_order_refund_inc
group by user_id,dt;
"
dws_trade_user_payment_1d="
insert overwrite table ${APP}.dws_trade_user_payment_1d partition(dt)
select
    user_id,
    count(distinct(order_id)),
    sum(sku_num),
    sum(split_payment_amount),
    dt
from ${APP}.dwd_trade_pay_detail_suc_inc
group by user_id,dt;
"
dws_trade_user_sku_order_1d="
insert overwrite table ${APP}.dws_trade_user_sku_order_1d partition(dt)
select
    user_id,
    id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    order_count_1d,
    order_num_1d,
    order_original_amount_1d,
    activity_reduce_amount_1d,
    coupon_reduce_amount_1d,
    order_total_amount_1d,
    dt
from
(
    select
        dt,
        user_id,
        sku_id,
        count(*) order_count_1d,
        sum(sku_num) order_num_1d,
        sum(split_original_amount) order_original_amount_1d,
        sum(nvl(split_activity_amount,0.0)) activity_reduce_amount_1d,
        sum(nvl(split_coupon_amount,0.0)) coupon_reduce_amount_1d,
        sum(split_total_amount) order_total_amount_1d
    from ${APP}.dwd_trade_order_detail_inc
    group by dt,user_id,sku_id
)od
left join
(
    select
        id,
        sku_name,
        category1_id,
        category1_name,
        category2_id,
        category2_name,
        category3_id,
        category3_name,
        tm_id,
        tm_name
    from ${APP}.dim_sku_full
    where dt='$do_date'
)sku
on od.sku_id=sku.id;
"
dws_trade_user_sku_order_refund_1d="
insert overwrite table ${APP}.dws_trade_user_sku_order_refund_1d partition(dt)
select
    user_id,
    sku_id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    order_refund_count,
    order_refund_num,
    order_refund_amount,
    dt
from
(
    select
        dt,
        user_id,
        sku_id,
        count(*) order_refund_count,
        sum(refund_num) order_refund_num,
        sum(refund_amount) order_refund_amount
    from ${APP}.dwd_trade_order_refund_inc
    group by dt,user_id,sku_id
)od
left join
(
    select
        id,
        sku_name,
        category1_id,
        category1_name,
        category2_id,
        category2_name,
        category3_id,
        category3_name,
        tm_id,
        tm_name
    from ${APP}.dim_sku_full
    where dt='$do_date'
)sku
on od.sku_id=sku.id;
"
dws_traffic_page_visitor_page_view_1d="
insert overwrite table ${APP}.dws_traffic_page_visitor_page_view_1d partition(dt='$do_date')
select
    mid_id,
    brand,
    model,
    operate_system,
    page_id,
    sum(during_time),
    count(*)
from ${APP}.dwd_traffic_page_view_inc
where dt='$do_date'
group by mid_id,brand,model,operate_system,page_id;
"
dws_traffic_session_page_view_1d="
insert overwrite table ${APP}.dws_traffic_session_page_view_1d partition(dt='$do_date')
select
    session_id,
    mid_id,
    brand,
    model,
    operate_system,
    version_code,
    channel,
    sum(during_time),
    count(*)
from ${APP}.dwd_traffic_page_view_inc
where dt='$do_date'
group by session_id,mid_id,brand,model,operate_system,version_code,channel;
"

case $1 in
    "dws_trade_province_order_1d" )
        hive -e "$dws_trade_province_order_1d"
    ;;
    "dws_trade_user_cart_add_1d" )
        hive -e "$dws_trade_user_cart_add_1d"
    ;;
    "dws_trade_user_order_1d" )
        hive -e "$dws_trade_user_order_1d"
    ;;
    "dws_trade_user_order_refund_1d" )
        hive -e "$dws_trade_user_order_refund_1d"
    ;;
    "dws_trade_user_payment_1d" )
        hive -e "$dws_trade_user_payment_1d"
    ;;
    "dws_trade_user_sku_order_1d" )
        hive -e "$dws_trade_user_sku_order_1d"
    ;;
    "dws_trade_user_sku_order_refund_1d" )
        hive -e "$dws_trade_user_sku_order_refund_1d"
    ;;
    "dws_traffic_page_visitor_page_view_1d" )
        hive -e "$dws_traffic_page_visitor_page_view_1d"
    ;;
    "dws_traffic_session_page_view_1d" )
        hive -e "$dws_traffic_session_page_view_1d"
    ;;
    "all" )
        hive -e "$dws_trade_province_order_1d$dws_trade_user_cart_add_1d$dws_trade_user_order_1d$dws_trade_user_order_refund_1d$dws_trade_user_payment_1d$dws_trade_user_sku_order_1d$dws_trade_user_sku_order_refund_1d$dws_traffic_page_visitor_page_view_1d$dws_traffic_session_page_view_1d"
    ;;
esac

2.每日装载脚本

#!/bin/bash
APP=gmall

# 如果输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
    do_date=$2
else 
    do_date=`date -d "-1 day" +%F`
fi

dws_trade_province_order_1d="
insert overwrite table ${APP}.dws_trade_province_order_1d partition(dt='$do_date')
select
    province_id,
    province_name,
    area_code,
    iso_code,
    iso_3166_2,
    order_count_1d,
    order_original_amount_1d,
    activity_reduce_amount_1d,
    coupon_reduce_amount_1d,
    order_total_amount_1d
from
(
    select
        province_id,
        count(distinct(order_id)) order_count_1d,
        sum(split_original_amount) order_original_amount_1d,
        sum(nvl(split_activity_amount,0)) activity_reduce_amount_1d,
        sum(nvl(split_coupon_amount,0)) coupon_reduce_amount_1d,
        sum(split_total_amount) order_total_amount_1d
    from ${APP}.dwd_trade_order_detail_inc
    where dt='$do_date'
    group by province_id
)o
left join
(
    select
        id,
        province_name,
        area_code,
        iso_code,
        iso_3166_2
    from ${APP}.dim_province_full
    where dt='$do_date'
)p
on o.province_id=p.id;
"
dws_trade_user_cart_add_1d="
insert overwrite table ${APP}.dws_trade_user_cart_add_1d partition(dt='$do_date')
select
    user_id,
    count(*),
    sum(sku_num)
from ${APP}.dwd_trade_cart_add_inc
where dt='$do_date'
group by user_id;
"
dws_trade_user_order_1d="
insert overwrite table ${APP}.dws_trade_user_order_1d partition(dt='$do_date')
select
    user_id,
    count(distinct(order_id)),
    sum(sku_num),
    sum(split_original_amount),
    sum(nvl(split_activity_amount,0)),
    sum(nvl(split_coupon_amount,0)),
    sum(split_total_amount)
from ${APP}.dwd_trade_order_detail_inc
where dt='$do_date'
group by user_id;
"
dws_trade_user_order_refund_1d="
insert overwrite table ${APP}.dws_trade_user_order_refund_1d partition(dt='$do_date')
select
    user_id,
    count(*),
    sum(refund_num),
    sum(refund_amount)
from ${APP}.dwd_trade_order_refund_inc
where dt='$do_date'
group by user_id;
"
dws_trade_user_payment_1d="
insert overwrite table ${APP}.dws_trade_user_payment_1d partition(dt='$do_date')
select
    user_id,
    count(distinct(order_id)),
    sum(sku_num),
    sum(split_payment_amount)
from ${APP}.dwd_trade_pay_detail_suc_inc
where dt='$do_date'
group by user_id;
"
dws_trade_user_sku_order_1d="
insert overwrite table ${APP}.dws_trade_user_sku_order_1d partition(dt='$do_date')
select
    user_id,
    id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    order_count,
    order_num,
    order_original_amount,
    activity_reduce_amount,
    coupon_reduce_amount,
    order_total_amount
from
(
    select
        user_id,
        sku_id,
        count(*) order_count,
        sum(sku_num) order_num,
        sum(split_original_amount) order_original_amount,
        sum(nvl(split_activity_amount,0)) activity_reduce_amount,
        sum(nvl(split_coupon_amount,0)) coupon_reduce_amount,
        sum(split_total_amount) order_total_amount
    from ${APP}.dwd_trade_order_detail_inc
    where dt='$do_date'
    group by user_id,sku_id
)od
left join
(
    select
        id,
        sku_name,
        category1_id,
        category1_name,
        category2_id,
        category2_name,
        category3_id,
        category3_name,
        tm_id,
        tm_name
    from ${APP}.dim_sku_full
    where dt='$do_date'
)sku
on od.sku_id=sku.id;
"
dws_trade_user_sku_order_refund_1d="
insert overwrite table ${APP}.dws_trade_user_sku_order_refund_1d partition(dt='$do_date')
select
    user_id,
    sku_id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    order_refund_count,
    order_refund_num,
    order_refund_amount
from
(
    select
        user_id,
        sku_id,
        count(*) order_refund_count,
        sum(refund_num) order_refund_num,
        sum(refund_amount) order_refund_amount
    from ${APP}.dwd_trade_order_refund_inc
    where dt='$do_date'
    group by user_id,sku_id
)od
left join
(
    select
        id,
        sku_name,
        category1_id,
        category1_name,
        category2_id,
        category2_name,
        category3_id,
        category3_name,
        tm_id,
        tm_name
    from ${APP}.dim_sku_full
    where dt='$do_date'
)sku
on od.sku_id=sku.id;
"
dws_traffic_page_visitor_page_view_1d="
insert overwrite table ${APP}.dws_traffic_page_visitor_page_view_1d partition(dt='$do_date')
select
    mid_id,
    brand,
    model,
    operate_system,
    page_id,
    sum(during_time),
    count(*)
from ${APP}.dwd_traffic_page_view_inc
where dt='$do_date'
group by mid_id,brand,model,operate_system,page_id;
"
dws_traffic_session_page_view_1d="
insert overwrite table ${APP}.dws_traffic_session_page_view_1d partition(dt='$do_date')
select
    session_id,
    mid_id,
    brand,
    model,
    operate_system,
    version_code,
    channel,
    sum(during_time),
    count(*)
from ${APP}.dwd_traffic_page_view_inc
where dt='$do_date'
group by session_id,mid_id,brand,model,operate_system,version_code,channel;
"

case $1 in
    "dws_trade_province_order_1d" )
        hive -e "$dws_trade_province_order_1d"
    ;;
    "dws_trade_user_cart_add_1d" )
        hive -e "$dws_trade_user_cart_add_1d"
    ;;
    "dws_trade_user_order_1d" )
        hive -e "$dws_trade_user_order_1d"
    ;;
    "dws_trade_user_order_refund_1d" )
        hive -e "$dws_trade_user_order_refund_1d"
    ;;
    "dws_trade_user_payment_1d" )
        hive -e "$dws_trade_user_payment_1d"
    ;;
    "dws_trade_user_sku_order_1d" )
        hive -e "$dws_trade_user_sku_order_1d"
    ;;
    "dws_trade_user_sku_order_refund_1d" )
        hive -e "$dws_trade_user_sku_order_refund_1d"
    ;;
    "dws_traffic_page_visitor_page_view_1d" )
        hive -e "$dws_traffic_page_visitor_page_view_1d"
    ;;
    "dws_traffic_session_page_view_1d" )
        hive -e "$dws_traffic_session_page_view_1d"
    ;;
    "all" )
        hive -e "$dws_trade_province_order_1d$dws_trade_user_cart_add_1d$dws_trade_user_order_1d$dws_trade_user_order_refund_1d$dws_trade_user_payment_1d$dws_trade_user_sku_order_1d$dws_trade_user_sku_order_refund_1d$dws_traffic_page_visitor_page_view_1d$dws_traffic_session_page_view_1d"
    ;;
esac

代码如下(示例):

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import  ssl
ssl._create_default_https_context = ssl._create_unverified_context

二、DWS最近n日汇总表

1.数据装载脚本

最近n日汇总表不需要区分首日装载和每日装载。
脚本名称:dws_1d_to_dws_nd.sh
脚本内容:

#!/bin/bash
APP=gmall

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
    do_date=$2
else 
    do_date=`date -d "-1 day" +%F`
fi

dws_trade_activity_order_nd="
insert overwrite table ${APP}.dws_trade_activity_order_nd partition(dt='$do_date')
select
    act.activity_id,
    activity_name,
    activity_type_code,
    activity_type_name,
    date_format(start_time,'yyyy-MM-dd'),
    sum(split_original_amount),
    sum(split_activity_amount)
from
(
    select
        activity_id,
        activity_name,
        activity_type_code,
        activity_type_name,
        start_time
    from ${APP}.dim_activity_full
    where dt='$do_date'
    and date_format(start_time,'yyyy-MM-dd')>=date_add('$do_date',-29)
    group by activity_id, activity_name, activity_type_code, activity_type_name,start_time
)act
left join
(
    select
        activity_id,
        order_id,
        split_original_amount,
        split_activity_amount
    from ${APP}.dwd_trade_order_detail_inc
    where dt>=date_add('$do_date',-29)
    and dt<='$do_date'
    and activity_id is not null
)od
on act.activity_id=od.activity_id
group by act.activity_id,activity_name,activity_type_code,activity_type_name,start_time;
"
dws_trade_coupon_order_nd="
insert overwrite table ${APP}.dws_trade_coupon_order_nd partition(dt='$do_date')
select
    id,
    coupon_name,
    coupon_type_code,
    coupon_type_name,
    benefit_rule,
    start_date,
    sum(split_original_amount),
    sum(split_coupon_amount)
from
(
    select
        id,
        coupon_name,
        coupon_type_code,
        coupon_type_name,
        benefit_rule,
        date_format(start_time,'yyyy-MM-dd') start_date
    from ${APP}.dim_coupon_full
    where dt='$do_date'
    and date_format(start_time,'yyyy-MM-dd')>=date_add('$do_date',-29)
)cou
left join
(
    select
        coupon_id,
        order_id,
        split_original_amount,
        split_coupon_amount
    from ${APP}.dwd_trade_order_detail_inc
    where dt>=date_add('$do_date',-29)
    and dt<='$do_date'
    and coupon_id is not null
)od
on cou.id=od.coupon_id
group by id,coupon_name,coupon_type_code,coupon_type_name,benefit_rule,start_date;
"
dws_trade_province_order_nd="
insert overwrite table ${APP}.dws_trade_province_order_nd partition(dt='$do_date')
select
    province_id,
    province_name,
    area_code,
    iso_code,
    iso_3166_2,
    sum(if(dt>=date_add('$do_date',-6),order_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_original_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),activity_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),coupon_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_total_amount_1d,0)),
    sum(order_count_1d),
    sum(order_original_amount_1d),
    sum(activity_reduce_amount_1d),
    sum(coupon_reduce_amount_1d),
    sum(order_total_amount_1d)
from ${APP}.dws_trade_province_order_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by province_id,province_name,area_code,iso_code,iso_3166_2;
"
dws_trade_user_cart_add_nd="
insert overwrite table ${APP}.dws_trade_user_cart_add_nd partition(dt='$do_date')
select
    user_id,
    sum(if(dt>=date_add('$do_date',-6),cart_add_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),cart_add_num_1d,0)),
    sum(cart_add_count_1d),
    sum(cart_add_num_1d)
from ${APP}.dws_trade_user_cart_add_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by user_id;
"
dws_trade_user_order_nd="
insert overwrite table ${APP}.dws_trade_user_order_nd partition(dt='$do_date')
select
    user_id,
    sum(if(dt>=date_add('$do_date',-6),order_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_num_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_original_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),activity_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),coupon_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_total_amount_1d,0)),
    sum(order_count_1d),
    sum(order_num_1d),
    sum(order_original_amount_1d),
    sum(activity_reduce_amount_1d),
    sum(coupon_reduce_amount_1d),
    sum(order_total_amount_1d)
from ${APP}.dws_trade_user_order_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by user_id;
"
dws_trade_user_order_refund_nd="
insert overwrite table ${APP}.dws_trade_user_order_refund_nd partition(dt='$do_date')
select
    user_id,
    sum(if(dt>=date_add('$do_date',-6),order_refund_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_refund_num_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_refund_amount_1d,0)),
    sum(order_refund_count_1d),
    sum(order_refund_num_1d),
    sum(order_refund_amount_1d)
from ${APP}.dws_trade_user_order_refund_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by user_id;
"
dws_trade_user_payment_nd="
insert overwrite table ${APP}.dws_trade_user_payment_nd partition (dt = '$do_date')
select user_id,
       sum(if(dt >= date_add('$do_date', -6), payment_count_1d, 0)),
       sum(if(dt >= date_add('$do_date', -6), payment_num_1d, 0)),
       sum(if(dt >= date_add('$do_date', -6), payment_amount_1d, 0)),
       sum(payment_count_1d),
       sum(payment_num_1d),
       sum(payment_amount_1d)
from ${APP}.dws_trade_user_payment_1d
where dt >= date_add('$do_date', -29)
  and dt <= '$do_date'
group by user_id;
"
dws_trade_user_sku_order_nd="
insert overwrite table ${APP}.dws_trade_user_sku_order_nd partition(dt='$do_date')
select
    user_id,
    sku_id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    sum(if(dt>=date_add('$do_date',-6),order_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_num_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_original_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),activity_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),coupon_reduce_amount_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_total_amount_1d,0)),
    sum(order_count_1d),
    sum(order_num_1d),
    sum(order_original_amount_1d),
    sum(activity_reduce_amount_1d),
    sum(coupon_reduce_amount_1d),
    sum(order_total_amount_1d)
from ${APP}.dws_trade_user_sku_order_1d
where dt>=date_add('$do_date',-30)
group by  user_id,sku_id,sku_name,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name,tm_id,tm_name;
"
dws_trade_user_sku_order_refund_nd="
insert overwrite table ${APP}.dws_trade_user_sku_order_refund_nd partition(dt='$do_date')
select
    user_id,
    sku_id,
    sku_name,
    category1_id,
    category1_name,
    category2_id,
    category2_name,
    category3_id,
    category3_name,
    tm_id,
    tm_name,
    sum(if(dt>=date_add('$do_date',-6),order_refund_count_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_refund_num_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),order_refund_amount_1d,0)),
    sum(order_refund_count_1d),
    sum(order_refund_num_1d),
    sum(order_refund_amount_1d)
from ${APP}.dws_trade_user_sku_order_refund_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by user_id,sku_id,sku_name,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name,tm_id,tm_name;
"
dws_traffic_page_visitor_page_view_nd="
insert overwrite table ${APP}.dws_traffic_page_visitor_page_view_nd partition(dt='$do_date')
select
    mid_id,
    brand,
    model,
    operate_system,
    page_id,
    sum(if(dt>=date_add('$do_date',-6),during_time_1d,0)),
    sum(if(dt>=date_add('$do_date',-6),view_count_1d,0)),
    sum(during_time_1d),
    sum(view_count_1d)
from ${APP}.dws_traffic_page_visitor_page_view_1d
where dt>=date_add('$do_date',-29)
and dt<='$do_date'
group by mid_id,brand,model,operate_system,page_id;
"

case $1 in
    "dws_trade_activity_order_nd" )
        hive -e "$dws_trade_activity_order_nd"
    ;;
    "dws_trade_coupon_order_nd" )
        hive -e "$dws_trade_coupon_order_nd"
    ;;
    "dws_trade_province_order_nd" )
        hive -e "$dws_trade_province_order_nd"
    ;;
    "dws_trade_user_cart_add_nd" )
        hive -e "$dws_trade_user_cart_add_nd"
    ;;
    "dws_trade_user_order_nd" )
        hive -e "$dws_trade_user_order_nd"
    ;;
    "dws_trade_user_order_refund_nd" )
        hive -e "$dws_trade_user_order_refund_nd"
    ;;
    "dws_trade_user_payment_nd" )
        hive -e "$dws_trade_user_payment_nd"
    ;;
    "dws_trade_user_sku_order_nd" )
        hive -e "$dws_trade_user_sku_order_nd"
    ;;
    "dws_trade_user_sku_order_refund_nd" )
        hive -e "$dws_trade_user_sku_order_refund_nd"
    ;;
    "dws_traffic_page_visitor_page_view_nd" )
        hive -e "$dws_traffic_page_visitor_page_view_nd"
    ;;
    "all" )
        hive -e "$dws_trade_activity_order_nd$dws_trade_coupon_order_nd$dws_trade_province_order_nd$dws_trade_user_cart_add_nd$dws_trade_user_order_nd$dws_trade_user_order_refund_nd$dws_trade_user_payment_nd$dws_trade_user_sku_order_nd$dws_trade_user_sku_order_refund_nd$dws_traffic_page_visitor_page_view_nd"
    ;;
esac

三、DWS历史至今td汇总表

1.首日数据装载脚本

脚本名称:dws_1d_to_dws_td_init.sh
脚本内容:

#!/bin/bash
APP=gmall

if [ -n "$2" ] ;then
   do_date=$2
else 
   echo "请传入日期参数"
   exit
fi

dws_trade_user_order_td="
insert overwrite table ${APP}.dws_trade_user_order_td partition(dt='$do_date')
select
    user_id,
    min(dt) login_date_first,
    max(dt) login_date_last,
    sum(order_count_1d) order_count,
    sum(order_num_1d) order_num,
    sum(order_original_amount_1d) original_amount,
    sum(activity_reduce_amount_1d) activity_reduce_amount,
    sum(coupon_reduce_amount_1d) coupon_reduce_amount,
    sum(order_total_amount_1d) total_amount
from ${APP}.dws_trade_user_order_1d
group by user_id;
"

dws_trade_user_payment_td="
insert overwrite table ${APP}.dws_trade_user_payment_td partition(dt='$do_date')
select
    user_id,
    min(dt) payment_date_first,
    max(dt) payment_date_last,
    sum(payment_count_1d) payment_count,
    sum(payment_num_1d) payment_num,
    sum(payment_amount_1d) payment_amount
from ${APP}.dws_trade_user_payment_1d
group by user_id;
"

dws_user_user_login_td="
insert overwrite table ${APP}.dws_user_user_login_td partition(dt='$do_date')
select
    u.id,
    nvl(login_date_last,date_format(create_time,'yyyy-MM-dd')),
    nvl(login_count_td,1)
from
(
    select
        id,
        create_time
    from ${APP}.dim_user_zip
    where dt='9999-12-31'
)u
left join
(
    select
        user_id,
        max(dt) login_date_last,
        count(*) login_count_td
    from ${APP}.dwd_user_login_inc
    group by user_id
)l
on u.id=l.user_id;
"

case $1 in
    "dws_trade_user_order_td" )
        hive -e "$dws_trade_user_order_td"
    ;;
    "dws_trade_user_payment_td" )
        hive -e "$dws_trade_user_payment_td"
    ;;
    "dws_user_user_login_td" )
        hive -e "$dws_user_user_login_td"
    ;;
    "all" )
        hive -e "$dws_trade_user_order_td$dws_trade_user_payment_td$dws_user_user_login_td"
    ;;
esac

2.每日数据装载脚本

#!/bin/bash
APP=gmall

# 如果输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
    do_date=$2
else 
    do_date=`date -d "-1 day" +%F`
fi

dws_trade_user_order_td="
insert overwrite table ${APP}.dws_trade_user_order_td partition(dt='$do_date')
select
    nvl(old.user_id,new.user_id),
    if(new.user_id is not null and old.user_id is null,'$do_date',old.order_date_first),
    if(new.user_id is not null,'$do_date',old.order_date_last),
    nvl(old.order_count_td,0)+nvl(new.order_count_1d,0),
    nvl(old.order_num_td,0)+nvl(new.order_num_1d,0),
    nvl(old.original_amount_td,0)+nvl(new.order_original_amount_1d,0),
    nvl(old.activity_reduce_amount_td,0)+nvl(new.activity_reduce_amount_1d,0),
    nvl(old.coupon_reduce_amount_td,0)+nvl(new.coupon_reduce_amount_1d,0),
    nvl(old.total_amount_td,0)+nvl(new.order_total_amount_1d,0)
from
(
    select
        user_id,
        order_date_first,
        order_date_last,
        order_count_td,
        order_num_td,
        original_amount_td,
        activity_reduce_amount_td,
        coupon_reduce_amount_td,
        total_amount_td
    from ${APP}.dws_trade_user_order_td
    where dt=date_add('$do_date',-1)
)old
full outer join
(
    select
        user_id,
        order_count_1d,
        order_num_1d,
        order_original_amount_1d,
        activity_reduce_amount_1d,
        coupon_reduce_amount_1d,
        order_total_amount_1d
    from ${APP}.dws_trade_user_order_1d
    where dt='$do_date'
)new
on old.user_id=new.user_id;
"

dws_trade_user_payment_td="
insert overwrite table ${APP}.dws_trade_user_payment_td partition(dt='$do_date')
select
    nvl(old.user_id,new.user_id),
    if(old.user_id is null and new.user_id is not null,'$do_date',old.payment_date_first),
    if(new.user_id is not null,'$do_date',old.payment_date_last),
    nvl(old.payment_count_td,0)+nvl(new.payment_count_1d,0),
    nvl(old.payment_num_td,0)+nvl(new.payment_num_1d,0),
    nvl(old.payment_amount_td,0)+nvl(new.payment_amount_1d,0)
from
(
    select
        user_id,
        payment_date_first,
        payment_date_last,
        payment_count_td,
        payment_num_td,
        payment_amount_td
    from ${APP}.dws_trade_user_payment_td
    where dt=date_add('$do_date',-1)
)old
full outer join
(
    select
        user_id,
        payment_count_1d,
        payment_num_1d,
        payment_amount_1d
    from ${APP}.dws_trade_user_payment_1d
    where dt='$do_date'
)new
on old.user_id=new.user_id;
"

dws_user_user_login_td="
insert overwrite table ${APP}.dws_user_user_login_td partition(dt='$do_date')
select
    nvl(old.user_id,new.user_id),
    if(new.user_id is null,old.login_date_last,'$do_date'),
    nvl(old.login_count_td,0)+nvl(new.login_count_1d,0)
from
(
    select
        user_id,
        login_date_last,
        login_count_td
    from ${APP}.dws_user_user_login_td
    where dt=date_add('$do_date',-1)
)old
full outer join
(
    select
        user_id,
        count(*) login_count_1d
    from ${APP}.dwd_user_login_inc
    where dt='$do_date'
    group by user_id
)new
on old.user_id=new.user_id;
"

case $1 in
    "dws_trade_user_order_td" )
        hive -e "$dws_trade_user_order_td"
    ;;
    "dws_trade_user_payment_td" )
        hive -e "$dws_trade_user_payment_td"
    ;;
    "dws_user_user_login_td" )
        hive -e "$dws_user_user_login_td"
    ;;
    "all" )
        hive -e "$dws_trade_user_order_td$dws_trade_user_payment_td$dws_user_user_login_td"
    ;;
esac
### 离线数据仓库 DWD 的作用及功能介绍 #### 1. 数据清洗与结构化 DWD(Data Warehouse Detail)是离线数据仓库中承上启下的重要次,其主要作用是对 ODS(Operational Data Store)中的原始数据进行清洗、转换和结构化处理[^1]。通过去除无效数据、修正错误数据以及补充缺失字段,DWD 能够为后续的数据分析提供高质量的基础数据支持。 #### 2. 数据去重与一致性保证 在数据装载过程中,DWD 会确保数据的一致性和完整性。例如,在购物车周期快照事实表的实现中,通过过滤条件 `is_ordered='0'` 和指定期分区 `dt='2020-06-14'`,可以有效避免重复数据的加载,并保持数据的时间维度一致性。 #### 3. 数据分区与存储优化 为了提高查询效率和降低存储成本,DWD 通常会对数据进行分区管理,例如按照期字段 `dt` 进行分区。此外,DWD 还会采用压缩技术(如 LZO 压缩算法)来减少存储空间占用,同时保持较高的查询性能[^2]。 #### 4. 数据模型规范化 DWD 的设计遵循严格的数据建模规范,旨在为后续的 DWS(Data Warehouse Summary)和 ADS(Application Data Service)提供清晰、一致的数据结构[^2]。例如,在流量域页面浏览事务事实表的实现中,DWD 会定义详细的建表语句,并明确各字段的含义和用途[^3]。 #### 5. 数据生命周期管理 DWD 数据通常具有较长的生命周期,可能保留多年甚至永久保存。这种设计是为了满足历史数据分析的需求,同时也便于追溯数据来源和变化过程。 #### 6. 支持多维度分析 DWD 通过对不同业务域(如交易域、流量域等)的事实表进行建模,能够为多维度分析提供丰富的数据支持。例如,流量域中的页面浏览、启动、动作、曝光和错误等事务事实表分别记录了用户行为的不同方面,从而为精细化运营提供了数据基础[^3]。 ```sql -- 示例:DWD 数据装载语句 insert overwrite table dwd_trade_cart_full partition(dt='2020-06-14') select id, user_id, sku_id, sku_name, sku_num from ods_cart_info_full where dt='2020-06-14' and is_ordered='0'; ``` --- ###
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值