①基于用户SparkALS数据源
数据源来自数仓平台,shopcar表记录登录用户购物车及商品信息;collect表记录登录用户收藏记录,收藏类型(1商品 2品牌);browse表记录登录用户浏览记录,收藏类型(1 商品 2品牌);订单状态来自主订单表和产品纬度表;浏览和深度浏览来自hbase的growingio_custom_event表
制定用户评分打分规则
浏览 1分
深度浏览 2分(浏览时间超过30s)
浏览次数 1分(超过2次2分)
收藏 2分
加购物车 3分
下单 4分
支付 5分
浏览,深度浏览和浏览次数的权重为0.7;收藏,加购物车,下单和支付的权重是0.3。
核心代码如下:
-- 商品详情页面停留时间打分表,数据来源是hbase,此处作映射表拿到数据
INSERT OVERWRITE TABLE bi_dw.growingio_custom_event_staytime
SELECT
t.product_id,
t.loginuserid,
t.stay_time,
CASE WHEN t.stay_time is NULL THEN 0
WHEN t.stay_time=0 THEN 0
WHEN t.stay_time>0 AND t.stay_time<30 THEN 1
ELSE 3
END AS rating
FROM
(SELECT
t.product_id,
t.loginuserid,
CASE WHEN stay_time1 is NULL OR stay_time1='' THEN stay_time2
ELSE stay_time1
END stay_time
FROM
(SELECT
regexp_extract(regexp_extract(t.c11,'(\\"+[0-9]+\\")',0),'([0-9]+)',0) AS product_id,
regexp_extract(t.c12,'([0-9]+)',0) AS loginuserid,
regexp_extract(regexp_extract(t.c11,'\\"+stay_time+\\"+(\\:+\\"+[0-9]+\\")',0),'([0-9]+)',0) AS stay_time1,
regexp_extract(regexp_extract(t.c11,'\\"+stay_time+\\"+(\\:+[0-9]+\\.+[0-9]+)',0),'([0-9]+\\.+[0-9])',0) AS stay_time2
FROM bi_dw.growingio_custom_event t WHERE t.c9='ProductDetailUserStayTime')t
)t
;
-- 用户评分表
set hive.exec.parallel=true;
INSERT OVERWRITE TABLE bi_ads.user_rating
SELECT
t.userid AS user_id, -- 用户id
t.objectid AS product_id, -- 商品id
((t.rating1 + t.rating2)/2*0.7 + (t.rating3 + t.rating4 + t.rating5)/3*0.3) AS rating, -- 总评分,0.7和0.3是权重
t.time -- 时间
FROM
(SELECT
t1.userid,
t1.objectid,
t1.addtime AS time,
t1.rating AS rating1, -- 浏览次数评分
t1.view_num,
t2.stay_time,
t3.product_id AS collction,
t6.add_shopcar_num,
CASE WHEN t2.rating is NULL THEN 0
ELSE t2.rating
END AS rating2, -- 商品详情页面停留时长即浏览时间评分
CASE WHEN t3.rating is NULL THEN 0
ELSE t3.ratin