create table if not exists dwd_mem(
pl string,
pl_id string,
en string,
en_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
province_name string,
province_id string,
city_name string,
city_id string,
u_mid string
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
from (
select
ol.pl,
dp.id as pl_id,
ol.en,
de.id as en_id,
ol.browser_name,
db.id browser_id,
ol.browser_version ,
dv.id browser_version_id,
ol.province ,
pv.id province_id,
ol.city ,
dc.id city_id,
ol.u_mid
from ods.ods_logs_orc ol
left join dim.dim_platform dp on dp.platform_name = ol.pl
left join dim.dim_event_name de on de.name = ol.en
left join dim.dim_browser_name db on db.browser_name = ol.browser_name
left join dim.dim_browser_version dv on dv.browser_version = ol.browser_version
left join dim.dim_province pv on pv.province = ol.province
left join dim.dim_city dc on dc.city = ol.city
) tmp
insert into dwd_mem partition (month='1',day = '01')
select *
;
1、会员主题下新增会员、总会员、活跃会员
创建dm层的数据:
#会员主题下新增用户、新增总用户、日活跃用户
create table if not exists dm_mem_mems(
pl string,
pl_id string,
new_mem_count int,
new_total_mem_count int,
active_mem_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
导入数据:
from (
select
mem.pl,
mem.pl_id,
count(distinct mem.u_mid) as new_mem_count,
count(distinct mem.u_mid) + nvl(mm.new_total_mem_count,0) as new_total_mem_count,
0 as active_mem_count
from dwd.dwd_mem mem
left join dm_mem_mems mm on mm.month = '1' and mm.day = '00' and mm.pl_id = mem.pl_id
where mem.month = 1 and mem.day = '01' and mem.en_id = 1
group by mem.pl,mem.pl_id,mm.new_total_mem_count
union all select
mem.pl,
mem.pl_id,
0 as new_mem_count,
0 as new_total_mem_count,
count(distinct mem.u_mid) as active_mem_count
from dwd.dwd_mem mem
where mem.month = 1 and mem.day ='01'
group by mem.pl,mem.pl_id)tmp
insert into dm_mem_mems partition(month = 1,day ='01')
select tmp.pl,tmp.pl_id,
sum(tmp.new_mem_count) new_mem_count,
sum(tmp.new_total_mem_count) new_total_mem_count,
sum(tmp.active_mem_count) active_mem_count
group by tmp.pl,tmp.pl_id
;
输出
hive (dm)> select * from dm_user_users;
OK
dm_user_users.pl dm_user_users.pl_id dm_user_users.new_user_count dm_user_users.new_total_user_count dm_user_users.active_user_count dm_user_users.month dm_user_users.day
java NULL 0 0 1 1 01
java_server 1 0 0 2 1 01
website 2 5 5 8 1 01
2、浏览器主题下新增会员、总会员、活跃会员
create table if not exists dm_browser_mems(
pl string,
pl_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
new_mem_count int,
new_total_mem_count int,
active_mem_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
#分析:平台和浏览器有关,因为有平台相关字段
#browser_version_id和浏览器类型无关
from (
select
mem.pl,
mem.pl_id,
mem.browser_name,
mem.browser_id,
mem.browser_version,
mem.browser_version_id,
count(distinct mem.u_mid) as new_mem_count,
count(distinct mem.u_mid) + nvl(bm.new_total_mem_count,0) as new_total_mem_count,
0 as active_mem_count
from dwd.dwd_mem mem
left join dm_browser_mems bm on bm.month = '1' and bm.day = '00' and bm.pl_id=mem.pl_id and
bm.browser_id = mem.browser_id and
bm.browser_version_id = mem.browser_version_id
where mem.month = 1 and mem.day = '01' and mem.en_id =1
group by mem.pl,mem.pl_id,mem.browser_name,mem.browser_id,mem.browser_version,mem.browser_version_id,bm.new_total_mem_count
union all select
mem.pl,
mem.pl_id,
mem.browser_name,
mem.browser_id,
mem.browser_version,
mem.browser_version_id,
0 as new_mem_count,
0 as new_total_mem_count,
count(distinct mem.u_mid) as active_mem_count
from dwd.dwd_mem mem
where mem.month = 1 and mem.day = '01'
group by mem.pl,mem.pl_id,mem.browser_name,mem.browser_id,mem.browser_version,mem.browser_version_id)tmp
insert into dm_browser_mems partition (month = 1,day ='01')
select
tmp.pl,
tmp.pl_id,
tmp.browser_name,
tmp.browser_id,
tmp.browser_version,
tmp.browser_version_id,
sum(tmp.new_mem_count) new_mem_count,
sum(tmp.new_total_mem_count) new_total_mem_count,
sum(tmp.active_mem_count) active_mem_count
group by tmp.pl,tmp.pl_id,tmp.browser_name,tmp.browser_id,tmp.browser_version,tmp.browser_version_id;
输出:
hive (dm)> select * from dm_browser_mems;
OK
dm_browser_mems.pl dm_browser_mems.pl_id dm_browser_mems.browser_name dm_browser_mems.browser_id dm_browser_mems.b rowser_version dm_browser_mems.browser_version_id dm_browser_mems.new_mem_count dm_browser_mems.new_total_mem_count d m_browser_mems.active_mem_count dm_browser_mems.month dm_browser_mems.day
java NULL Chrome 4 31.0.1650.63 4 0 0 1 1 01
java_server 1 Chrome 4 31.0.1650.63 4 0 0 2 1 01
java_server 1 null 3 null 3 0 0 2 1 01
website 2 Chrome 4 31.0.1650.63 4 0 0 1 1 01
website 2 Chrome 4 47.0.2526.106 NULL 1 1 1 1 01
website 2 Chrome 4 70.0.3538.77 NULL 1 1 1 1 01
website 2 Firefox NULL 63.0 NULL 0 0 1 1 01
website 2 IE 1 8.0 1 1 1 1 1 01
website 2 Sogou Explorer 2 2.X 2 1 1 1 1 01
3、地域主题下的活跃会员
create table if not exists dm_area_mems(
pl string,
pl_id string,
province string,
province_id string,
city string,
city_id string,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;
from (
select
mem.pl,
mem.pl_id,
mem.province_name,
mem.province_id,
mem.city_name,
mem.city_id,
count(distinct mem.u_mid) as active_mem
from dwd.dwd_mem mem
where mem.month = 1 and mem.day = '01'
group by mem.pl,mem.pl_id,mem.province_name,mem.province_id,mem.city_name,mem.city_id)tmp
insert into dm_area_mems partition (month = 1,day ='01')
select
tmp.pl,
tmp.pl_id,
tmp.province_name,
tmp.province_id,
tmp.city_name,
tmp.city_id,
tmp.active_mem
group by tmp.pl,tmp.pl_id,tmp.province_name,tmp.province_id,tmp.city_name,tmp.city_id,tmp.active_mem;
输出:
hive (dm)> select * from dm_area_mems;
OK
dm_area_mems.pl dm_area_mems.pl_id dm_area_mems.province dm_area_mems.province_id dm_area_mems.city dm_area_mems.city_id dm_area_mems.active_user_count dm_area_mems.month dm_area_mems.day
java NULL 北京市 2002 昌平区 NULL 1 1 01
java_server 1 广西省 NULL 广西南宁市 NULL 2 1 01
java_server 1 贵州省 2025 贵阳市 4266 2 1 01
website 2 北京市 2002 昌平区 NULL 1 1 01
website 2 广西省 NULL 广西南宁市 NULL 1 1 01
website 2 河南省 2017 平顶山市 4109 1 1 01
website 2 贵州省 2025 贵阳市 4266 1 1 01
website 2 贵州省 2025 黔西南州兴义市 NULL 1 1 01