Hive 3道面试题练习(包含详细解题过程)

本文探讨了如何使用SQL处理用户访问数据,包括按月统计访问次数、累计访问次数,以及分析京东店铺访客数量和高频访客。此外,还介绍了蚂蚁金服低碳生活项目的数据统计方法,涵盖植物申领统计和低碳用户排名分析。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1.

我们有如下的用户访问数据

userIdvisitDatevisitCount
u012017/1/215
u022017/1/236
u032017/1/228
u042017/1/203
u012017/1/236
u012017/2/218
u022017/1/236
u012017/2/224

要求使用SQL统计出每个用户的累积访问次数,如下表所示:

用户id月份小计累积
u012017-011111
u012017-021223
u022017-011212
u032017-0188
u042017-0133

数据:

u01 2017/1/21 5

u02 2017/1/23 6

u03 2017/1/22 8

u04 2017/1/20 3

u01 2017/1/23 6

u01 2017/2/21 8

u02 2017/1/23 6

u01 2017/2/22 4

1)创建表

create table action
(userId string,
visitDate string,
visitCount int) 
row format delimited fields terminated by "\t";

load data local inpath '/opt/module/hive/datas/action' into table action

思路一:

a. 先处理日期格式

SELECT userId,  
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION    ==>t1
 

b. 根据用户id和月份分组,求小计
SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM t1 
GROUP BY userId,visitDate   ==>t2

组合:
SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM (SELECT userId,  
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION)t1 
GROUP BY userId,visitDate

+------------+---------------+-----+--+
| t1.userid  | t1.visitdate  | xj  |
+------------+---------------+-----+--+
| u01        | 2017-01       | 11  |
| u01        | 2017-02       | 12  |
| u02        | 2017-01       | 12  |
| u03        | 2017-01       | 8   |
| u04        | 2017-01       | 3   |
+------------+---------------+-----+--+


c. 开窗,通过userId分区,visitData排序, 求累计

SELECT t2.userId,t2.visitDate, t2.xj ,  
SUM(t2.xj) over(PARTITION BY t2.userId ORDER BY t2.visitDate) lj
FROM t2 


组合:
SELECT t2.userId,t2.visitDate, t2.xj ,  
SUM(t2.xj) over(PARTITION BY t2.userId ORDER BY t2.visitDate) lj
FROM (SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM (SELECT userId,  
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION)t1 
GROUP BY userId,visitDate)t2 

+------------+---------------+--------+-----+--+
| t2.userid  | t2.visitdate  | t2.xj  | lj  |
+------------+---------------+--------+-----+--+
| u01        | 2017-01       | 11     | 11  |
| u01        | 2017-02       | 12     | 23  |
| u02        | 2017-01       | 12     | 12  |
| u03        | 2017-01       | 8      | 8   |
| u04        | 2017-01       | 3      | 3   |
+------------+---------------+--------+-----+--+

思路二:

SELECT tmp.userId, tmp.mon,
sum(visitCount) over(PARTITION by tmp.userid,tmp.mon ORDER BY tmp.mon) s_m
from(
SELECT userId, SUBSTR(regexp_replace(visitDate, '(\/)', '-'),0,6) mon,visitCount
from action
 ) tmp
 +-------------+----------+------+
| tmp.userid  | tmp.mon  | s_m  |
+-------------+----------+------+
| u01         | 2017-1   | 11   |
| u01         | 2017-1   | 11   |
| u01         | 2017-2   | 12   |
| u01         | 2017-2   | 12   |
| u02         | 2017-1   | 12   |
| u02         | 2017-1   | 12   |
| u03         | 2017-1   | 8    |
| u04         | 2017-1   | 3    |
+-------------+----------+------+

SELECT DISTINCT tmp.userId, tmp.mon,
sum(visitCount) over(PARTITION by tmp.userid,tmp.mon ORDER BY tmp.mon) s_m
from(
SELECT userId, SUBSTR(regexp_replace(visitDate, '(\/)', '-'),0,6) mon,visitCount
from action
 ) tmp ;
+-------------+----------+------+
| tmp.userid  | tmp.mon  | s_m  |
+-------------+----------+------+
| u01         | 2017-1   | 11   |
| u01         | 2017-2   | 12   |
| u02         | 2017-1   | 12   |
| u03         | 2017-1   | 8    |
| u04         | 2017-1   | 3    |
+-------------+----------+------+

SELECT DISTINCT tmp.userId, tmp.mon,
sum(visitCount) over(PARTITION by tmp.userid,tmp.mon ORDER BY tmp.mon) s_m,
sum(visitCount) over(PARTITION by userid ORDER BY mon) s_t
from(
SELECT userId, SUBSTR(regexp_replace(visitDate, '(\/)', '-'),0,6) mon,visitCount
from action
 ) tmp ;
+-------------+----------+------+------+
| tmp.userid  | tmp.mon  | s_m  | s_t  |
+-------------+----------+------+------+
| u01         | 2017-1   | 11   | 11   |
| u01         | 2017-2   | 12   | 23   |
| u02         | 2017-1   | 12   | 12   |
| u03         | 2017-1   | 8    | 8    |
| u04         | 2017-1   | 3    | 3    |
+-------------+----------+------+------+
5 rows selected (5.437 seconds)

SELECT DISTINCT userId, mon,
sum(visitCount) over(PARTITION by userid,mon ORDER BY mon) s_m,
sum(visitCount) over(PARTITION by userid ORDER BY mon) s_t
from(
SELECT userId, SUBSTR(regexp_replace(visitDate, '(\/)', '-'),0,6) mon,visitCount
from action
 ) tmp ;
+---------+---------+------+------+
| userid  |   mon   | s_m  | s_t  |
+---------+---------+------+------+
| u01     | 2017-1  | 11   | 11   |
| u01     | 2017-2  | 12   | 23   |
| u02     | 2017-1  | 12   | 12   |
| u03     | 2017-1  | 8    | 8    |
| u04     | 2017-1  | 3    | 3    |
+---------+---------+------+------+

思路三:

select userid, Date_format(regexp_replace(visitdate,"/","-"),"yyyy-MM") visitdate,sum(visitcount) visitcount
from action
group by visitdate,userid
+---------+------------+-------------+
| userid  | visitdate  | visitcount  |
+---------+------------+-------------+
| u01     | 2017-01    | 5           |
| u01     | 2017-01    | 6           |
| u01     | 2017-02    | 8           |
| u01     | 2017-02    | 4           |
| u02     | 2017-01    | 12          |
| u03     | 2017-01    | 8           |
| u04     | 2017-01    | 3           |
+---------+------------+-------------+

select userid,visitdate,sum(visitcount) sum_count
from(
select userid, Date_format(regexp_replace(visitdate,"/","-"),"yyyy-MM") visitdate,sum(visitcount) visitcount
from action
group by visitdate,userid
)t1
group by visitdate,userid
+---------+------------+------------+
| userid  | visitdate  | sum_count  |
+---------+------------+------------+
| u01     | 2017-01    | 11         |
| u02     | 2017-01    | 12         |
| u03     | 2017-01    | 8          |
| u04     | 2017-01    | 3          |
| u01     | 2017-02    | 12         |
+---------+------------+------------+

select  userid,visitdate,sum_count,
sum(sum_count) over(partition by userid order by visitdate) lj
from(
select userid,visitdate,sum(visitcount) sum_count
from(
select userid, Date_format(regexp_replace(visitdate,"/","-"),"yyyy-MM") visitdate,sum(visitcount) visitcount
from action
group by visitdate,userid
)t1
group by visitdate,userid
)t2
+---------+------------+------------+-----+
| userid  | visitdate  | sum_count  | lj  |
+---------+------------+------------+-----+
| u01     | 2017-01    | 11         | 11  |
| u01     | 2017-02    | 12         | 23  |
| u02     | 2017-01    | 12         | 12  |
| u03     | 2017-01    | 8          | 8   |
| u04     | 2017-01    | 3          | 3   |
+---------+------------+------------+-----+
5 rows selected (4.88 seconds)

2.京东

有50W个京东店铺,每个顾客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:

u1 a

u2 b

u1 b

u1 a

u3 c

u4 b

u1 a

u2 c

u5 b

u4 b

u6 c

u2 c

u1 b

u2 a

u2 a

u3 a

u5 a

u5 a

u5 a

建表:

create table visit(user_id string,shop string) row format delimited fields terminated by ‘\t’;

1)每个店铺的UV(访客数)

--每个店铺的UV(访客数)
--思路1:不去重
select shop,count(user_id)
from visit
group by shop
+-------+------+
| shop  | _c1  |
+-------+------+
| a     | 9    |
| b     | 6    |
| c     | 4    |
+-------+------+
--思路2:去重
SELECT shop ,COUNT(DISTINCT user_id) uv 
FROM visit
GROUP BY shop 
+-------+-----+
| shop  | uv  |
+-------+-----+
| a     | 4   |
| b     | 4   |
| c     | 3   |
+-------+-----+

2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

方法一:

--每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

select shop,user_id,count(*) c_u
from visit
group by shop,user_id
order by shop,c_u desc
+-------+----------+------+
| shop  | user_id  | c_u  |
+-------+----------+------+
| a     | u1       | 3    |
| a     | u5       | 3    |
| a     | u2       | 2    |
| a     | u3       | 1    |
| b     | u1       | 2    |
| b     | u4       | 2    |
| b     | u2       | 1    |
| b     | u5       | 1    |
| c     | u2       | 2    |
| c     | u3       | 1    |
| c     | u6       | 1    |
+-------+----------+------+


select shop,user_id,c_u,
rank() over(partition by shop order by c_u desc) rs
from (
select shop,user_id,count(*) c_u
from visit
group by shop,user_id
)tmp_1

+-------+----------+------+-----+
| shop  | user_id  | c_u  | rs  |
+-------+----------+------+-----+
| a     | u1       | 3    | 1   |
| a     | u5       | 3    | 1   |
| a     | u2       | 2    | 3   |
| a     | u3       | 1    | 4   |
| b     | u1       | 2    | 1   |
| b     | u4       | 2    | 1   |
| b     | u2       | 1    | 3   |
| b     | u5       | 1    | 3   |
| c     | u2       | 2    | 1   |
| c     | u3       | 1    | 2   |
| c     | u6       | 1    | 2   |
+-------+----------+------+-----+

select tmp_3.s_u,tmp_3.t,tmp_3.rt
from(
select tmp_2.s_u,tmp_2.t,
rank() over(partition by tmp_2.shop order by tmp_2.t desc) rt
from (
select tmp_1.shop,tmp_1.s_u,count(s_u) t
from (
select shop,concat_ws("_",shop,user_id) s_u
from visit
) tmp_1
group by tmp_1.s_u,tmp_1.shop
) tmp_2
)tmp_3
where tmp_3.rt<=3;
+-------+----------+------+-----+
| shop  | user_id  | c_u  | rs  |
+-------+----------+------+-----+
| a     | u5       | 3    | 1   |
| a     | u1       | 3    | 1   |
| a     | u2       | 2    | 3   |
| b     | u4       | 2    | 1   |
| b     | u1       | 2    | 1   |
| b     | u5       | 1    | 3   |
| b     | u2       | 1    | 3   |
| c     | u2       | 2    | 1   |
| c     | u6       | 1    | 2   |
| c     | u3       | 1    | 2   |
+-------+----------+------+-----+
10 rows selected (5.017 seconds)

方法二:

select concat_ws("_",shop,user_id) 
from visit
+-------+
|  _c0  |
+-------+
| a_u1  |
| b_u2  |
| b_u1  |
| a_u1  |
| c_u3  |
| b_u4  |
| a_u1  |
| c_u2  |
| b_u5  |
| b_u4  |
| c_u6  |
| c_u2  |
| b_u1  |
| a_u2  |
| a_u2  |
| a_u3  |
| a_u5  |
| a_u5  |
| a_u5  |
+-------+
select shop,s_u,count(s_u) t
from (
select shop,concat_ws("_",shop,user_id) s_u
from visit
) tmp_1
group by s_u,shop

+-------+-------+----+
| shop  |  s_u  | t  |
+-------+-------+----+
| a     | a_u1  | 3  |
| a     | a_u2  | 2  |
| a     | a_u3  | 1  |
| a     | a_u5  | 3  |
| b     | b_u1  | 2  |
| b     | b_u2  | 1  |
| b     | b_u4  | 2  |
| b     | b_u5  | 1  |
| c     | c_u2  | 2  |
| c     | c_u3  | 1  |
| c     | c_u6  | 1  |
+-------+-------+----+
select *
from(
select tmp_2.s_u,tmp_2.t,
rank() over(partition by tmp_2.shop order by tmp_2.t desc) rt
from (
select tmp_1.shop,tmp_1.s_u,count(s_u) t
from (
select shop,concat_ws("_",shop,user_id) s_u
from visit
) tmp_1
group by tmp_1.s_u,tmp_1.shop
) tmp_2
)tmp_3
where tmp_3.rt<=3;
+------------+----------+-----------+
| tmp_3.s_u  | tmp_3.t  | tmp_3.rt  |
+------------+----------+-----------+
| a_u5       | 3        | 1         |
| a_u1       | 3        | 1         |
| a_u2       | 2        | 3         |
| b_u4       | 2        | 1         |
| b_u1       | 2        | 1         |
| b_u5       | 1        | 3         |
| b_u2       | 1        | 3         |
| c_u2       | 2        | 1         |
| c_u6       | 1        | 2         |
| c_u3       | 1        | 2         |
+------------+----------+-----------+
10 rows selected (6.32 seconds)

3.蚂蚁金服

以下表记录了用户每天的蚂蚁森林低碳生活领取的记录流水。

table_name:user_low_carbon

user_id data_dt low_carbon

用户 日期 减少碳排放(g)

蚂蚁森林植物换购表,用于记录申领环保植物所需要减少的碳排放量

table_name: plant_carbon

plant_id plant_name low_carbon

植物编号 植物名 换购植物所需要的碳

----题目

1.蚂蚁森林植物申领统计

问题:假设2017年1月1日开始记录低碳数据(user_low_carbon),假设2017年10月1日之前满足申领条件的用户都申领了一颗p004-胡杨,

剩余的能量全部用来领取“p002-沙柳” 。

统计在10月1日累计申领“p002-沙柳” 排名前10的用户信息;以及他比后一名多领了几颗沙柳。

得到的统计结果如下表样式:

user_id plant_count less_count(比后一名多领了几颗沙柳)

u_101 1000 100

u_088 900 400

u_103 500 …

2、蚂蚁森林低碳用户排名分析

问题:查询user_low_carbon表中每日流水记录,条件为:

用户在2017年,连续三天(或以上)的天数里,

每天减少碳排放(low_carbon)都超过100g的用户低碳流水。

需要查询返回满足以上条件的user_low_carbon表中的记录流水。

例如用户u_002符合条件的记录如下,因为2017/1/2~2017/1/5连续四天的碳排放量之和都大于等于100g:

seq(key) user_id data_dt low_carbon

xxxxx10 u_002 2017/1/2 150

xxxxx11 u_002 2017/1/2 70

xxxxx12 u_002 2017/1/3 30

xxxxx13 u_002 2017/1/3 80

xxxxx14 u_002 2017/1/4 150

xxxxx14 u_002 2017/1/5 101

备注:统计方法不限于sql、procedure、python,java等

提供的数据说明:

user_low_carbon:

u_001 2017/1/1 10

u_001 2017/1/2 150

u_001 2017/1/2 110

u_001 2017/1/2 10

u_001 2017/1/4 50

u_001 2017/1/4 10

u_001 2017/1/6 45

u_001 2017/1/6 90

u_002 2017/1/1 10

u_002 2017/1/2 150

u_002 2017/1/2 70

u_002 2017/1/3 30

u_002 2017/1/3 80

u_002 2017/1/4 150

u_002 2017/1/5 101

u_002 2017/1/6 68

plant_carbon:

p001 梭梭树 17

p002 沙柳 19

p003 樟子树 146

p004 胡杨 215

1.创建表

create table user_low_carbon(user_id String,data_dt String,low_carbon int) row format delimited fields terminated by ‘\t’;

create table plant_carbon(plant_id string,plant_name String,low_carbon int) row format delimited fields terminated by ‘\t’;

2.加载数据

load data local inpath “/opt/module/hive/datas/user_low_carbon.txt” into table user_low_carbon;

load data local inpath “/opt/module/hive/datas/plant_carbon.txt” into table plant_carbon;

3.设置本地模式

set hive.exec.mode.local.auto=true;

+--------------------------+--------------------------+-----------------------------+
| user_low_carbon.user_id  | user_low_carbon.data_dt  | user_low_carbon.low_carbon  |
+--------------------------+--------------------------+-----------------------------+
| u_001                    | 2017/1/1                 | 10                          |
| u_001                    | 2017/1/2                 | 150                         |
| u_001                    | 2017/1/2                 | 110                         |
| u_001                    | 2017/1/2                 | 10                          |
| u_001                    | 2017/1/4                 | 50                          |
| u_001                    | 2017/1/4                 | 10                          |
| u_001                    | 2017/1/6                 | 45                          |
| u_001                    | 2017/1/6                 | 90                          |
| u_002                    | 2017/1/1                 | 10                          |
| u_002                    | 2017/1/2                 | 150                         |
| u_002                    | 2017/1/2                 | 70                          |
| u_002                    | 2017/1/3                 | 30                          |
| u_002                    | 2017/1/3                 | 80                          |
| u_002                    | 2017/1/4                 | 150                         |
| u_002                    | 2017/1/5                 | 101                         |
| u_002                    | 2017/1/6                 | 68                          |
+--------------------------+--------------------------+-----------------------------+
+------------------------+--------------------------+--------------------------+
| plant_carbon.plant_id  | plant_carbon.plant_name  | plant_carbon.low_carbon  |
+------------------------+--------------------------+--------------------------+
| p001                   | 梭梭树                      | 17                       |
| p002                   | 沙柳                       | 19                       |
| p003                   | 樟子树                      | 146                      |
| p004                   | 胡杨                       | 215                      |
+------------------------+--------------------------+--------------------------+

– 需求一

1.蚂蚁森林植物申领统计
问题:假设2017年1月1日开始记录低碳数据(user_low_carbon),
假设2017年10月1日之前满足申领条件的用户都申领了一颗p004-胡杨,
剩余的能量全部用来领取“p002-沙柳” 。
统计在10月1日之前累计申领“p002-沙柳” 排名前10的用户信息;
以及他比后一名多领了几颗沙柳

方式一:

-- 1. 处理日期格式  2017/1/1 == > 2017-01-01 
SELECT 
   user_id ,
   DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
   low_carbon
FROM 
  user_low_carbon  ==>t1

-- 2. 求出2017年10月1日之前,每个用户的总能量
SELECT 
    t1.user_id,
    SUM(t1.low_carbon) sum_low_carbon
FROM 
   t1
WHERE t1.data_dt < '2017-10-01' 
GROUP BY t1.user_id  
ORDER BY sum_low_carbon DESC 
LIMIT 11   ==>t2

-- 组合
SELECT 
    t1.user_id,
    SUM(t1.low_carbon) sum_low_carbon
FROM 
   (SELECT 
   user_id ,
   DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
   low_carbon
FROM 
  user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01' 
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC 
LIMIT 11

+-------------+-----------------+
| t1.user_id  | sum_low_carbon  |
+-------------+-----------------+
| u_007       | 1470            |
| u_013       | 1430            |
| u_008       | 1240            |
| u_005       | 1100            |
| u_010       | 1080            |
| u_014       | 1060            |
| u_011       | 960             |
| u_009       | 930             |
| u_006       | 830             |
| u_002       | 659             |
| u_004       | 640             |
+-------------+-----------------+

-- 3. 求出领取胡杨和沙柳需要的能量
SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004'  ==> t3
SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002'  ==> t4 

-- 4. 求出每个人领取的沙柳的数量

SELECT 
   t2.user_id ,
   t2.sum_low_carbon,
   FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM  
  t2 ,
  t3 , 
  t4    ==>t5

-- 组合
SELECT 
   t2.user_id ,
   t2.sum_low_carbon,
   FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM  
  (SELECT 
    t1.user_id,
    SUM(t1.low_carbon) sum_low_carbon
FROM 
   (SELECT 
   user_id ,
   DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
   low_carbon
FROM 
  user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01' 
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC 
LIMIT 11)t2 ,
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 , 
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4 

 +-------------+--------------------+-----+
| t2.user_id  | t2.sum_low_carbon  | sl  |
+-------------+--------------------+-----+
| u_007       | 1470               | 66  |
| u_013       | 1430               | 63  |
| u_008       | 1240               | 53  |
| u_005       | 1100               | 46  |
| u_010       | 1080               | 45  |
| u_014       | 1060               | 44  |
| u_011       | 960                | 39  |
| u_009       | 930                | 37  |
| u_006       | 830                | 32  |
| u_002       | 659                | 23  |
| u_004       | 640                | 22  |
+-------------+--------------------+-----+

-- 5. 将下一行的数据提取到当前行
SELECT 
   t5.user_id ,
   t5.sum_low_carbon,
   t5.sl,
   lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
   t5 
LIMIT 10   ==>t6

-- 组合
SELECT 
   t5.user_id ,
   t5.sum_low_carbon,
   t5.sl,
   lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
   (SELECT 
   t2.user_id ,
   t2.sum_low_carbon,
   FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM  
  (SELECT 
    t1.user_id,
    SUM(t1.low_carbon) sum_low_carbon
FROM 
   (SELECT 
   user_id ,
   DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
   low_carbon
FROM 
  user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01' 
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC 
LIMIT 11)t2 ,
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 , 
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4 )t5    
LIMIT 10

+-------------+--------------------+--------+----------+
| t5.user_id  | t5.sum_low_carbon  | t5.sl  | next_sl  |
+-------------+--------------------+--------+----------+
| u_007       | 1470               | 66     | 63       |
| u_013       | 1430               | 63     | 53       |
| u_008       | 1240               | 53     | 46       |
| u_005       | 1100               | 46     | 45       |
| u_010       | 1080               | 45     | 44       |
| u_014       | 1060               | 44     | 39       |
| u_011       | 960                | 39     | 37       |
| u_009       | 930                | 37     | 32       |
| u_006       | 830                | 32     | 23       |
| u_002       | 659                | 23     | 22       |
+-------------+--------------------+--------+----------+

-- 计算比下个人多领取多少颗沙柳

SELECT
    t6.user_id ,
    t6.sum_low_carbon,
    t6.sl,
    t6.next_sl,
    t6.sl-t6.next_sl sl_diff
FROM t6

-- 最终组合 :
 SELECT
    t6.user_id ,
    t6.sum_low_carbon,
    t6.sl,
    t6.next_sl,
    t6.sl-t6.next_sl sl_diff
FROM 
   (SELECT 
   t5.user_id ,
   t5.sum_low_carbon,
   t5.sl,
   lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
   (SELECT 
   t2.user_id ,
   t2.sum_low_carbon,
   FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM  
  (SELECT 
    t1.user_id,
    SUM(t1.low_carbon) sum_low_carbon
FROM 
   (SELECT 
   user_id ,
   DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
   low_carbon
FROM 
  user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01' 
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC 
LIMIT 11)t2 ,
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 , 
  (SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4 )t5    
LIMIT 10)t6


+-------------+--------------------+--------+-------------+----------+
| t6.user_id  | t6.sum_low_carbon  | t6.sl  | t6.next_sl  | sl_diff  |
+-------------+--------------------+--------+-------------+----------+
| u_007       | 1470               | 66     | 63          | 3        |
| u_013       | 1430               | 63     | 53          | 10       |
| u_008       | 1240               | 53     | 46          | 7        |
| u_005       | 1100               | 46     | 45          | 1        |
| u_010       | 1080               | 45     | 44          | 1        |
| u_014       | 1060               | 44     | 39          | 5        |
| u_011       | 960                | 39     | 37          | 2        |
| u_009       | 930                | 37     | 32          | 5        |
| u_006       | 830                | 32     | 23          | 9        |
| u_002       | 659                | 23     | 22          | 1        |
+-------------+--------------------+--------+-------------+----------+

方式二:

select user_id,sum(low_carbon)
from user_low_carbon
group by user_id;

+----------+-------+
| user_id  |  _c1  |
+----------+-------+
| u_001    | 475   |
| u_002    | 659   |
| u_003    | 620   |
| u_004    | 640   |
| u_005    | 1100  |
| u_006    | 830   |
| u_007    | 1470  |
| u_008    | 1240  |
| u_009    | 930   |
| u_010    | 1080  |
| u_011    | 960   |
| u_012    | 250   |
| u_013    | 1430  |
| u_014    | 1060  |
| u_015    | 290   |
+----------+-------+
select low_carbon p04
from plant_carbon
where plant_id='p004'

select user_id,
sum(low_carbon)-(select low_carbon from plant_carbon where plant_id='p004') s_l
from user_low_carbon
group by user_id

+----------+-------+
| user_id  |  s_l  |
+----------+-------+
| u_001    | 260   |
| u_015    | 75    |
| u_014    | 845   |
| u_013    | 1215  |
| u_012    | 35    |
| u_011    | 745   |
| u_010    | 865   |
| u_009    | 715   |
| u_008    | 1025  |
| u_007    | 1255  |
| u_006    | 615   |
| u_005    | 885   |
| u_004    | 425   |
| u_003    | 405   |
| u_002    | 444   |
+----------+-------+


--1
select distinct user_id,
floor((sum(low_carbon) over(partition by user_id )-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon p02 from plant_carbon where plant_id='p002' ) )plant_count
from user_low_carbon
order by plant_count desc

--2
select user_id,floor((t-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon p02 from plant_carbon where plant_id='p002' ) )plant_count
from(
select distinct user_id,
sum(low_carbon) over(partition by user_id ) t
from user_low_carbon
) tmp_1
order by plant_count desc

--3
select user_id,
floor((sum(low_carbon)-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon p02 from plant_carbon where plant_id='p002' )) plant_count
from user_low_carbon
group by user_id
order by plant_count desc


+----------+--------------+
| user_id  | plant_count  |
+----------+--------------+
| u_007    | 66           |
| u_013    | 63           |
| u_008    | 53           |
| u_005    | 46           |
| u_010    | 45           |
| u_014    | 44           |
| u_011    | 39           |
| u_009    | 37           |
| u_006    | 32           |
| u_002    | 23           |
| u_004    | 22           |
| u_003    | 21           |
| u_001    | 13           |
| u_015    | 3            |
| u_012    | 1            |
+----------+--------------+


select user_id,plant_count,
plant_count-(lead(plant_count,1) over(order by plant_count desc)) less_count
from(
select distinct user_id,
floor((sum(low_carbon) over(partition by user_id )-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon p02 from plant_carbon where plant_id='p002' ) )plant_count
from user_low_carbon
) tmp

+----------+--------------+-------------+
| user_id  | plant_count  | less_count  |
+----------+--------------+-------------+
| u_007    | 66           | 3           |
| u_013    | 63           | 10          |
| u_008    | 53           | 7           |
| u_005    | 46           | 1           |
| u_010    | 45           | 1           |
| u_014    | 44           | 5           |
| u_011    | 39           | 2           |
| u_009    | 37           | 5           |
| u_006    | 32           | 9           |
| u_002    | 23           | 1           |
| u_004    | 22           | 1           |
| u_003    | 21           | 8           |
| u_001    | 13           | 10          |
| u_015    | 3            | 2           |
| u_012    | 1            | NULL        |
+----------+--------------+-------------+
select user_id,plant_count,
plant_count-(lead(plant_count,1) over(order by plant_count desc)) less_count
from(
select distinct user_id,
floor((sum(low_carbon) over(partition by user_id )-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon p02 from plant_carbon where plant_id='p002' ) )plant_count
from user_low_carbon
) tmp
limit 10;
+----------+--------------+-------------+
| user_id  | plant_count  | less_count  |
+----------+--------------+-------------+
| u_007    | 66           | 3           |
| u_013    | 63           | 10          |
| u_008    | 53           | 7           |
| u_005    | 46           | 1           |
| u_010    | 45           | 1           |
| u_014    | 44           | 5           |
| u_011    | 39           | 2           |
| u_009    | 37           | 5           |
| u_006    | 32           | 9           |
| u_002    | 23           | 1           |
+----------+--------------+-------------+
10 rows selected (13.457 seconds)

整理:

select user_id,date_format(regexp_replace(data_dt,'/','-'),"yyyy-MM-dd") dt,low_carbon
from user_low_carbon

select user_id,sum(low_carbon) low_carbon
from (
select user_id,date_format(regexp_replace(data_dt,'/','-'),"yyyy-MM-dd") dt,low_carbon
from user_low_carbon
)t1
where dt <'2017-10-01'
group by user_id
+----------+-------------+
| user_id  | low_carbon  |
+----------+-------------+
| u_001    | 475         |
| u_002    | 659         |
| u_003    | 620         |
| u_004    | 640         |
| u_005    | 1100        |
| u_006    | 830         |
| u_007    | 1470        |
| u_008    | 1240        |
| u_009    | 930         |
| u_010    | 1080        |
| u_011    | 960         |
| u_012    | 250         |
| u_013    | 1430        |
| u_014    | 1060        |
| u_015    | 290         |
+----------+-------------+
select user_id,
floor((sum(low_carbon)-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon from plant_carbon where plant_id='p002')) sl
from (
select user_id,date_format(regexp_replace(data_dt,'/','-'),"yyyy-MM-dd") dt,low_carbon
from user_low_carbon
)t1
where dt <'2017-10-01'
group by user_id
order by sl desc
limit 11
+----------+-----+
| user_id  | sl  |
+----------+-----+
| u_007    | 66  |
| u_013    | 63  |
| u_008    | 53  |
| u_005    | 46  |
| u_010    | 45  |
| u_014    | 44  |
| u_011    | 39  |
| u_009    | 37  |
| u_006    | 32  |
| u_002    | 23  |
| u_004    | 22  |
+----------+-----+
select user_id,sl,
(sl-lead(sl,1) over(order by sl desc))gap
from(
select user_id,
floor((sum(low_carbon)-(select low_carbon from plant_carbon where plant_id='p004'))/
(select low_carbon from plant_carbon where plant_id='p002')) sl
from (
select user_id,date_format(regexp_replace(data_dt,'/','-'),"yyyy-MM-dd") dt,low_carbon
from user_low_carbon
)t1
where dt <'2017-10-01'
group by user_id
order by sl desc
limit 11
)t2
limit 10
+----------+-----+------+
| user_id  | sl  | gap  |
+----------+-----+------+
| u_007    | 66  | 3    |
| u_013    | 63  | 10   |
| u_008    | 53  | 7    |
| u_005    | 46  | 1    |
| u_010    | 45  | 1    |
| u_014    | 44  | 5    |
| u_011    | 39  | 2    |
| u_009    | 37  | 5    |
| u_006    | 32  | 9    |
| u_002    | 23  | 1    |
+----------+-----+------+
10 rows selected (14.748 seconds)

– 需求二

2、蚂蚁森林低碳用户排名分析
问题:查询user_low_carbon表中每日流水记录,条件为:
用户在2017年,连续三天(或以上)的天数里,
每天减少碳排放(low_carbon)都超过100g的用户低碳流水。
需要查询返回满足以上条件的user_low_carbon表中的记录流水。

select user_id,regexp_replace(data_dt,"/","-") data_dt,sum(low_carbon) s
from user_low_carbon 
group by user_id,data_dt
having s>=100
+----------+-----------+------+
| user_id  |  data_dt  |  s   |
+----------+-----------+------+
| u_001    | 2017-1-2  | 270  |
| u_001    | 2017-1-6  | 135  |
| u_002    | 2017-1-2  | 220  |
| u_002    | 2017-1-3  | 110  |
| u_002    | 2017-1-4  | 150  |
| u_002    | 2017-1-5  | 101  |
| u_003    | 2017-1-2  | 160  |
| u_003    | 2017-1-3  | 160  |
| u_003    | 2017-1-5  | 120  |
| u_003    | 2017-1-7  | 120  |
| u_004    | 2017-1-1  | 110  |
| u_004    | 2017-1-3  | 120  |
| u_004    | 2017-1-6  | 120  |
| u_004    | 2017-1-7  | 130  |
| u_005    | 2017-1-2  | 130  |
| u_005    | 2017-1-3  | 180  |
| u_005    | 2017-1-4  | 190  |
| u_005    | 2017-1-6  | 280  |
| u_005    | 2017-1-7  | 160  |
| u_006    | 2017-1-2  | 180  |
| u_006    | 2017-1-3  | 220  |
| u_006    | 2017-1-7  | 290  |
| u_007    | 2017-1-1  | 130  |
| u_007    | 2017-1-2  | 360  |
| u_007    | 2017-1-4  | 530  |
| u_007    | 2017-1-6  | 230  |
| u_007    | 2017-1-7  | 160  |
| u_008    | 2017-1-1  | 160  |
| u_008    | 2017-1-2  | 120  |
| u_008    | 2017-1-4  | 260  |
| u_008    | 2017-1-5  | 360  |
| u_008    | 2017-1-6  | 160  |
| u_008    | 2017-1-7  | 120  |
| u_009    | 2017-1-2  | 140  |
| u_009    | 2017-1-3  | 170  |
| u_009    | 2017-1-4  | 270  |
| u_009    | 2017-1-7  | 140  |
| u_010    | 2017-1-2  | 180  |
| u_010    | 2017-1-4  | 170  |
| u_010    | 2017-1-5  | 180  |
| u_010    | 2017-1-6  | 190  |
| u_010    | 2017-1-7  | 180  |
| u_011    | 2017-1-1  | 110  |
| u_011    | 2017-1-2  | 200  |
| u_011    | 2017-1-3  | 120  |
| u_011    | 2017-1-4  | 100  |
| u_011    | 2017-1-5  | 100  |
| u_011    | 2017-1-6  | 100  |
| u_011    | 2017-1-7  | 230  |
| u_012    | 2017-1-2  | 130  |
| u_013    | 2017-1-2  | 200  |
| u_013    | 2017-1-3  | 150  |
| u_013    | 2017-1-4  | 550  |
| u_013    | 2017-1-5  | 350  |
| u_014    | 2017-1-1  | 220  |
| u_014    | 2017-1-2  | 140  |
| u_014    | 2017-1-5  | 250  |
| u_014    | 2017-1-6  | 120  |
| u_014    | 2017-1-7  | 290  |
| u_015    | 2017-1-7  | 140  |
+----------+-----------+------+


select user_id,data_dt, c
from(
select user_id,data_dt,
count(s) over (partition by user_id ) c
from (
select user_id,regexp_replace(data_dt,"/","-") data_dt,sum(low_carbon) s
from user_low_carbon 
group by user_id,data_dt
having s>=100
) tmp_1
)tmp_2
where c>=3

+----------+-----------+----+
| user_id  |  data_dt  | c  |
+----------+-----------+----+
| u_002    | 2017-1-2  | 4  |
| u_002    | 2017-1-3  | 4  |
| u_002    | 2017-1-4  | 4  |
| u_002    | 2017-1-5  | 4  |
| u_003    | 2017-1-2  | 4  |
| u_003    | 2017-1-3  | 4  |
| u_003    | 2017-1-5  | 4  |
| u_003    | 2017-1-7  | 4  |
| u_004    | 2017-1-1  | 4  |
| u_004    | 2017-1-3  | 4  |
| u_004    | 2017-1-6  | 4  |
| u_004    | 2017-1-7  | 4  |
| u_005    | 2017-1-2  | 5  |
| u_005    | 2017-1-3  | 5  |
| u_005    | 2017-1-4  | 5  |
| u_005    | 2017-1-6  | 5  |
| u_005    | 2017-1-7  | 5  |
| u_006    | 2017-1-2  | 3  |
| u_006    | 2017-1-3  | 3  |
| u_006    | 2017-1-7  | 3  |
| u_007    | 2017-1-1  | 5  |
| u_007    | 2017-1-2  | 5  |
| u_007    | 2017-1-4  | 5  |
| u_007    | 2017-1-6  | 5  |
| u_007    | 2017-1-7  | 5  |
| u_008    | 2017-1-1  | 6  |
| u_008    | 2017-1-2  | 6  |
| u_008    | 2017-1-4  | 6  |
| u_008    | 2017-1-5  | 6  |
| u_008    | 2017-1-6  | 6  |
| u_008    | 2017-1-7  | 6  |
| u_009    | 2017-1-2  | 4  |
| u_009    | 2017-1-3  | 4  |
| u_009    | 2017-1-4  | 4  |
| u_009    | 2017-1-7  | 4  |
| u_010    | 2017-1-2  | 5  |
| u_010    | 2017-1-4  | 5  |
| u_010    | 2017-1-5  | 5  |
| u_010    | 2017-1-6  | 5  |
| u_010    | 2017-1-7  | 5  |
| u_011    | 2017-1-1  | 7  |
| u_011    | 2017-1-2  | 7  |
| u_011    | 2017-1-3  | 7  |
| u_011    | 2017-1-4  | 7  |
| u_011    | 2017-1-5  | 7  |
| u_011    | 2017-1-6  | 7  |
| u_011    | 2017-1-7  | 7  |
| u_013    | 2017-1-2  | 4  |
| u_013    | 2017-1-3  | 4  |
| u_013    | 2017-1-4  | 4  |
| u_013    | 2017-1-5  | 4  |
| u_014    | 2017-1-1  | 5  |
| u_014    | 2017-1-2  | 5  |
| u_014    | 2017-1-5  | 5  |
| u_014    | 2017-1-6  | 5  |
| u_014    | 2017-1-7  | 5  |
+----------+-----------+----+

select user_id,data_dt, c,
lead(data_dt,3) over(partition by user_id order by data_dt) ld
from(
select user_id,data_dt,
count(s) over (partition by user_id ) c
from (
select user_id,regexp_replace(data_dt,"/","-") data_dt,sum(low_carbon) s
from user_low_carbon 
group by user_id,data_dt
having s>=100
) tmp_1
)tmp_2
where c>=3 
+----------+-----------+----+-----------+
| user_id  |  data_dt  | c  |    ld     |
+----------+-----------+----+-----------+
| u_002    | 2017-1-2  | 4  | 2017-1-5  |
| u_002    | 2017-1-3  | 4  | NULL      |
| u_002    | 2017-1-4  | 4  | NULL      |
| u_002    | 2017-1-5  | 4  | NULL      |
| u_003    | 2017-1-2  | 4  | 2017-1-7  |
| u_003    | 2017-1-3  | 4  | NULL      |
| u_003    | 2017-1-5  | 4  | NULL      |
| u_003    | 2017-1-7  | 4  | NULL      |
| u_004    | 2017-1-1  | 4  | 2017-1-7  |
| u_004    | 2017-1-3  | 4  | NULL      |
| u_004    | 2017-1-6  | 4  | NULL      |
| u_004    | 2017-1-7  | 4  | NULL      |
| u_005    | 2017-1-2  | 5  | 2017-1-6  |
| u_005    | 2017-1-3  | 5  | 2017-1-7  |
| u_005    | 2017-1-4  | 5  | NULL      |
| u_005    | 2017-1-6  | 5  | NULL      |
| u_005    | 2017-1-7  | 5  | NULL      |
| u_006    | 2017-1-2  | 3  | NULL      |
| u_006    | 2017-1-3  | 3  | NULL      |
| u_006    | 2017-1-7  | 3  | NULL      |
| u_007    | 2017-1-1  | 5  | 2017-1-6  |
| u_007    | 2017-1-2  | 5  | 2017-1-7  |
| u_007    | 2017-1-4  | 5  | NULL      |
| u_007    | 2017-1-6  | 5  | NULL      |
| u_007    | 2017-1-7  | 5  | NULL      |
| u_008    | 2017-1-1  | 6  | 2017-1-5  |
| u_008    | 2017-1-2  | 6  | 2017-1-6  |
| u_008    | 2017-1-4  | 6  | 2017-1-7  |
| u_008    | 2017-1-5  | 6  | NULL      |
| u_008    | 2017-1-6  | 6  | NULL      |
| u_008    | 2017-1-7  | 6  | NULL      |
| u_009    | 2017-1-2  | 4  | 2017-1-7  |
| u_009    | 2017-1-3  | 4  | NULL      |
| u_009    | 2017-1-4  | 4  | NULL      |
| u_009    | 2017-1-7  | 4  | NULL      |
| u_010    | 2017-1-2  | 5  | 2017-1-6  |
| u_010    | 2017-1-4  | 5  | 2017-1-7  |
| u_010    | 2017-1-5  | 5  | NULL      |
| u_010    | 2017-1-6  | 5  | NULL      |
| u_010    | 2017-1-7  | 5  | NULL      |
| u_011    | 2017-1-1  | 7  | 2017-1-4  |
| u_011    | 2017-1-2  | 7  | 2017-1-5  |
| u_011    | 2017-1-3  | 7  | 2017-1-6  |
| u_011    | 2017-1-4  | 7  | 2017-1-7  |
| u_011    | 2017-1-5  | 7  | NULL      |
| u_011    | 2017-1-6  | 7  | NULL      |
| u_011    | 2017-1-7  | 7  | NULL      |
| u_013    | 2017-1-2  | 4  | 2017-1-5  |
| u_013    | 2017-1-3  | 4  | NULL      |
| u_013    | 2017-1-4  | 4  | NULL      |
| u_013    | 2017-1-5  | 4  | NULL      |
| u_014    | 2017-1-1  | 5  | 2017-1-6  |
| u_014    | 2017-1-2  | 5  | 2017-1-7  |
| u_014    | 2017-1-5  | 5  | NULL      |
| u_014    | 2017-1-6  | 5  | NULL      |
| u_014    | 2017-1-7  | 5  | NULL      |
+----------+-----------+----+-----------+
select distinct user_id
from(
select user_id,data_dt, c,
datediff(lead(data_dt,3) over(partition by user_id order by data_dt) ,data_dt) gap
from(
select user_id,data_dt,
count(s) over (partition by user_id ) c
from (
select user_id,regexp_replace(data_dt,"/","-") data_dt,sum(low_carbon) s
from user_low_carbon 
group by user_id,data_dt
having s>=100
) tmp_1
)tmp_2
where c>=3 
)tmp_3
where gap=3
+----------+
| user_id  |
+----------+
| u_002    |
| u_008    |
| u_010    |
| u_011    |
| u_013    |
+----------+

select user_id,data_dt,low_carbon
from user_low_carbon
where user_id in
(
select distinct user_id
from(
select user_id,data_dt, c,
datediff(lead(data_dt,3) over(partition by user_id order by data_dt) ,data_dt) gap
from(
select user_id,data_dt,
count(s) over (partition by user_id ) c
from (
select user_id,regexp_replace(data_dt,"/","-") data_dt,sum(low_carbon) s
from user_low_carbon 
group by user_id,data_dt
having s>=100
) tmp_1
)tmp_2
where c>=3 
)tmp_3
where gap=3
)
order by user_id,data_dt;
+----------+-----------+-------------+
| u_002    | 2017/1/1  | 10          |
| u_002    | 2017/1/2  | 70          |
| u_002    | 2017/1/2  | 150         |
| u_002    | 2017/1/3  | 80          |
| u_002    | 2017/1/3  | 30          |
| u_002    | 2017/1/4  | 150         |
| u_002    | 2017/1/5  | 101         |
| u_002    | 2017/1/6  | 68          |
| u_008    | 2017/1/1  | 160         |
| u_008    | 2017/1/2  | 60          |
| u_008    | 2017/1/2  | 60          |
| u_008    | 2017/1/3  | 60          |
| u_008    | 2017/1/4  | 260         |
| u_008    | 2017/1/5  | 360         |
| u_008    | 2017/1/6  | 160         |
| u_008    | 2017/1/7  | 60          |
| u_008    | 2017/1/7  | 60          |
| u_010    | 2017/1/1  | 90          |
| u_010    | 2017/1/2  | 90          |
| u_010    | 2017/1/2  | 90          |
| u_010    | 2017/1/3  | 90          |
| u_010    | 2017/1/4  | 90          |
| u_010    | 2017/1/4  | 80          |
| u_010    | 2017/1/5  | 90          |
| u_010    | 2017/1/5  | 90          |
| u_010    | 2017/1/6  | 190         |
| u_010    | 2017/1/7  | 90          |
| u_010    | 2017/1/7  | 90          |
| u_011    | 2017/1/1  | 110         |
| u_011    | 2017/1/2  | 100         |
| u_011    | 2017/1/2  | 100         |
| u_011    | 2017/1/3  | 120         |
| u_011    | 2017/1/4  | 100         |
| u_011    | 2017/1/5  | 100         |
| u_011    | 2017/1/6  | 100         |
| u_011    | 2017/1/7  | 100         |
| u_011    | 2017/1/7  | 130         |
| u_013    | 2017/1/1  | 50          |
| u_013    | 2017/1/2  | 50          |
| u_013    | 2017/1/2  | 150         |
| u_013    | 2017/1/3  | 150         |
| u_013    | 2017/1/4  | 550         |
| u_013    | 2017/1/5  | 350         |
| u_013    | 2017/1/6  | 50          |
| u_013    | 2017/1/7  | 20          |
| u_013    | 2017/1/7  | 60          |
+----------+-----------+-------------+
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值