SELECT
from_unixtime(paytime,'yyyy-MM-dd hh:mm:ss')
FROM user_trade
WHERE dt='2019-04-09';
拓展:把日期转换为时间戳
unix_timestamp
SELECT
unix_timestamp(firstactivetime)
FROM user_info
WHERE level=5;
2. 如何计算日期间隔
语法
datediff(string enddate, string startdate)
举例:用户首次激活时间,与2019年5月1日的日期间隔
SELECT user_name,
datediff('2019-05-01', to_date(firstactivetime))
FROM user_info
LIMIT 10;
拓展:日期增加,减少函数
语法
date_add(string startdate, int days)
date_sub (string startdate, int days)
举例
SELECT user_name,
date_add(firstactivetime,7)
FROM user_info
LIMIT 10;
3. case when 条件函数
统计以下4个年龄段20岁以下,20-30岁,30-40岁,40岁以上的用户分布
SELECT case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end,
count(user_id) as user_num
FROM user_info
GROUP BY case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end;
4. if函数
统计每个性别用户等级高低的分布情况
SELECT sex,
if(level>5,'高','低'),
count(user_id) as user_num
FROM user_info
GROUP BY sex,
if(level>5,'高','低');
5. 字符串函数
语法
substr(string A, int start, int len)
每个月新激活用户数
SELECT substr(firstactivetime,1,7) as month,
count(user_id) as user_num
FROM user_info
GROUP BY substr(firstactivetime,1,7);
语法
get_json_object(string json_string, string path)
不同手机品牌的用户数
SELECT get_json_object(extra1,'$.phonebrand') as phone_brand,
count(user_id) as user_num
FROM user_info
GROUP BY get_json_object(extra1,'$.phonebrand');
不同手机品牌学历分布用户数
SELECT extra2['phonebrand'] as phone_brand,
extra2['education'] as education,
count(user_id) as user_num
FROM user_info
GROUP BY extra2['phonebrand'],extra2['education'];
6. 聚合统计函数
ELLA用户2018年平均支付金额,以及2018年最大支付日期和最小支付日期的时间间隔
SELECT avg(pay_amount) as avg_amount,
datediff(max(from_unixtime(pay_time,'yy-MM-dd')),
min(from_unixtime(pay_time,'yy-MM-dd')))
FROM user_trade
WHERE year(dt)='2018'
and user_name='ELLA';
# 另一种写法
SELECT avg(pay_amount) as avg_amount,
datediff(from_unixtime(max(pay_time),'yy-MM-dd'),
from_unixtime(min(pay_time),'yy-MM-dd'))
FROM user_trade
WHERE year(dt)='2018'
and user_name='ELLA';
7. 综合练习
2018年购买的商品品类在两个以上的用户数
SELECT count(a.user_name)
FROM (SELECT user_name,
count(distinct goods_category) as category_num
FROM user_trade
WHERE year(dt)='2018'
GROUP BY user_name HAVING count(distinct goods_category) > 2) as a;
用户激活时间在2018年,年龄段在20-30岁和30-40岁的婚姻状况分布
SELECT a.age_type,
if(marriage_status=1,'已婚','未婚'),
count(a.user_id)
FROM
(SELECT case when age<20 then '20岁以下'
when age<30 then '20-30岁'
when age<40 then '30-40岁'
else '40岁以上' end as age_type,
get_json_object(extra1, '$.marriage_status') as marriage_status,
user_id
FROM user_info
WHERE year(firstactivetime)='2018') as a
WHERE a.age_type in ('20-30岁', '30-40岁')
GROUP BY a.age_type,
if(marriage_status=1,'已婚','未婚');
激活天数距今超过300天的男女分布情况
SELECT sex,
count(user_id)
FROM user_info
WHERE datediff('2019-08-07', substr(firstactivetime,1,10))>300
GROUP BY sex;
不同性别,教育程度的分布情况
SELECT sex,
get_json_object(extra1,'$.education'),
count(user_id)
FROM user_info
GROUP BY sex,
get_json_object(extra1,'$.education');
2019年1月1日到2019年4月30日,每个时段的不同品类购买金额分布
select substr(from_unixtime(pay_time,'yyyy-MM-dd hh'),12),
goods_category,
sum(pay_amount)
from user_trade
where dt between '2019-01-01' and '2019-04-30'
group by substr(from_unixtime(pay_time,'yyyy-MM-dd hh'),12),
goods_category;