hive电商项目:3会员主题

本文详细介绍使用Hive SQL构建数据仓库的过程,包括DWD、DM层数据的创建与分析,涉及会员、浏览器及地域主题的数据处理。通过左连接、分组及聚合函数实现数据的整合与计算,为业务决策提供数据支持。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

create table if not exists dwd_mem(
pl string,
pl_id string,
en string,
en_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
province_name string,
province_id string,
city_name string,
city_id string,
u_mid string
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

from (
select
ol.pl,
dp.id as pl_id,
ol.en,
de.id as en_id,
ol.browser_name,
db.id browser_id,
ol.browser_version ,
dv.id browser_version_id,
ol.province ,
pv.id province_id,
ol.city ,
dc.id city_id,
ol.u_mid
from ods.ods_logs_orc ol
left join dim.dim_platform dp on dp.platform_name = ol.pl
left join dim.dim_event_name de on de.name = ol.en
left join dim.dim_browser_name db on db.browser_name = ol.browser_name
left join dim.dim_browser_version dv on dv.browser_version = ol.browser_version
left join dim.dim_province pv on pv.province = ol.province
left join dim.dim_city dc on dc.city = ol.city
) tmp
insert into dwd_mem partition (month='1',day = '01')
select *
;

1、会员主题下新增会员、总会员、活跃会员

创建dm层的数据:
#会员主题下新增用户、新增总用户、日活跃用户
create table if not exists dm_mem_mems(
pl string,
pl_id string,
new_mem_count int,
new_total_mem_count int,
active_mem_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

导入数据:
from (
select 
mem.pl,
mem.pl_id,
count(distinct mem.u_mid) as new_mem_count,
count(distinct mem.u_mid) + nvl(mm.new_total_mem_count,0) as new_total_mem_count,
0 as active_mem_count
from dwd.dwd_mem mem
left join dm_mem_mems mm on mm.month = '1' and mm.day = '00' and mm.pl_id = mem.pl_id
where mem.month = 1 and mem.day = '01' and mem.en_id = 1
group by mem.pl,mem.pl_id,mm.new_total_mem_count
union all select
mem.pl,
mem.pl_id,
0 as new_mem_count,
0 as new_total_mem_count,
count(distinct mem.u_mid) as active_mem_count
from dwd.dwd_mem mem
where mem.month = 1 and mem.day ='01'
group by mem.pl,mem.pl_id)tmp
insert into dm_mem_mems partition(month = 1,day ='01')
select tmp.pl,tmp.pl_id,
sum(tmp.new_mem_count) new_mem_count,
sum(tmp.new_total_mem_count) new_total_mem_count,
sum(tmp.active_mem_count) active_mem_count
group by tmp.pl,tmp.pl_id
;
输出
hive (dm)> select * from dm_user_users;
OK
dm_user_users.pl        dm_user_users.pl_id     dm_user_users.new_user_count    dm_user_users.new_total_user_count      dm_user_users.active_user_count  dm_user_users.month     dm_user_users.day
java    NULL    0       0       1       1       01
java_server     1       0       0       2       1       01
website 2       5       5       8       1       01


2、浏览器主题下新增会员、总会员、活跃会员

create table if not exists dm_browser_mems(
pl string,
pl_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
new_mem_count int,
new_total_mem_count int,
active_mem_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

#分析:平台和浏览器有关,因为有平台相关字段
#browser_version_id和浏览器类型无关
from (
select 
mem.pl,
mem.pl_id,
mem.browser_name,
mem.browser_id,
mem.browser_version,
mem.browser_version_id,
count(distinct mem.u_mid) as new_mem_count,
count(distinct mem.u_mid) + nvl(bm.new_total_mem_count,0) as new_total_mem_count,
0 as active_mem_count
from dwd.dwd_mem mem
left join dm_browser_mems bm on bm.month = '1' and bm.day = '00' and bm.pl_id=mem.pl_id and 
bm.browser_id = mem.browser_id and 
bm.browser_version_id = mem.browser_version_id 
where mem.month = 1 and mem.day = '01' and mem.en_id =1
group by mem.pl,mem.pl_id,mem.browser_name,mem.browser_id,mem.browser_version,mem.browser_version_id,bm.new_total_mem_count
union all select 
mem.pl,
mem.pl_id,
mem.browser_name,
mem.browser_id,
mem.browser_version,
mem.browser_version_id,
0 as new_mem_count,
0 as new_total_mem_count,
count(distinct mem.u_mid) as active_mem_count
from dwd.dwd_mem mem
where mem.month = 1 and mem.day = '01'
group by mem.pl,mem.pl_id,mem.browser_name,mem.browser_id,mem.browser_version,mem.browser_version_id)tmp
insert into dm_browser_mems partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.browser_name,
tmp.browser_id,
tmp.browser_version,
tmp.browser_version_id,
sum(tmp.new_mem_count) new_mem_count,
sum(tmp.new_total_mem_count) new_total_mem_count,
sum(tmp.active_mem_count) active_mem_count
group by tmp.pl,tmp.pl_id,tmp.browser_name,tmp.browser_id,tmp.browser_version,tmp.browser_version_id;

输出:
hive (dm)> select * from dm_browser_mems;
OK
dm_browser_mems.pl      dm_browser_mems.pl_id   dm_browser_mems.browser_name    dm_browser_mems.browser_id      dm_browser_mems.b                         rowser_version  dm_browser_mems.browser_version_id      dm_browser_mems.new_mem_count   dm_browser_mems.new_total_mem_count     d                         m_browser_mems.active_mem_count dm_browser_mems.month   dm_browser_mems.day
java    NULL    Chrome  4       31.0.1650.63    4       0       0       1       1       01
java_server     1       Chrome  4       31.0.1650.63    4       0       0       2       1       01
java_server     1       null    3       null    3       0       0       2       1       01
website 2       Chrome  4       31.0.1650.63    4       0       0       1       1       01
website 2       Chrome  4       47.0.2526.106   NULL    1       1       1       1       01
website 2       Chrome  4       70.0.3538.77    NULL    1       1       1       1       01
website 2       Firefox NULL    63.0    NULL    0       0       1       1       01
website 2       IE      1       8.0     1       1       1       1       1       01
website 2       Sogou Explorer  2       2.X     2       1       1       1       1       01


3、地域主题下的活跃会员
create table if not exists dm_area_mems(
pl string,
pl_id string,
province string,
province_id string,
city string,
city_id string,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by '\u0001'
stored as orc
;

from (
select 
mem.pl,
mem.pl_id,
mem.province_name,
mem.province_id,
mem.city_name,
mem.city_id,
count(distinct mem.u_mid) as active_mem
from dwd.dwd_mem mem
where mem.month = 1 and mem.day = '01'
group by mem.pl,mem.pl_id,mem.province_name,mem.province_id,mem.city_name,mem.city_id)tmp
insert into dm_area_mems partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.province_name,
tmp.province_id,
tmp.city_name,
tmp.city_id,
tmp.active_mem
group by tmp.pl,tmp.pl_id,tmp.province_name,tmp.province_id,tmp.city_name,tmp.city_id,tmp.active_mem;

输出:
hive (dm)> select * from dm_area_mems;
OK
dm_area_mems.pl dm_area_mems.pl_id      dm_area_mems.province   dm_area_mems.province_id        dm_area_mems.city       dm_area_mems.city_id     dm_area_mems.active_user_count  dm_area_mems.month      dm_area_mems.day
java    NULL    北京市  2002    昌平区  NULL    1       1       01
java_server     1       广西省  NULL    广西南宁市      NULL    2       1       01
java_server     1       贵州省  2025    贵阳市  4266    2       1       01
website 2       北京市  2002    昌平区  NULL    1       1       01
website 2       广西省  NULL    广西南宁市      NULL    1       1       01
website 2       河南省  2017    平顶山市        4109    1       1       01
website 2       贵州省  2025    贵阳市  4266    1       1       01
website 2       贵州省  2025    黔西南州兴义市  NULL    1       1       01

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值