- 列拆分成行
把某含有 ‘1,2,3,4’ 的列拆分为4行
select * from (select * from
(select *,'1,2,3,4' c from baoxian_olap.fin_bx_dim_product_class_info) t1
LATERAL VIEW explode(split(c,',')) info AS t2
)
2. 建表带分区
CREATE TABLE IF NOT EXISTS prd_bx_short_order_revenue_cps_xb
(
order_no STRING,
premium_year DOUBLE,
product_name STRING,
product_code STRING,
order_date STRING COMMENT '订单日',
)
COMMENT '保险 短险收入 全量表'
PARTITIONED BY
(
pday STRING COMMENT '统计日'
);
- 限制分组人数超过阈值 Having
GROUP BY a.period
,b.period
,a.premium_bin
HAVING COUNT(a.order_no) > 30
- 向上取整
,ceil(premium_renewal/10) AS premium_bin2
- 计算时间差
,DATEDIFF(getdate(),deduct_begin)-1 AS deduct_d
6.取小数点位数round
,round(sum(premium_t25)/sum(premium_renewal_t25),2)
- 两个表拼接
union all
- 字符串截取
substr(DATEADD(deduct_begin,1,'mm'),1,10)
不确定截取长度时结合length字段
,substr(insure_period,1,LENGTH(insure_period)-1) as insure_period
9.生成一个常变量
,'产品级' level
- 删除分区,插入分区
alter table prd_bx_ltv_rate_short_6m_pd drop if exists partition(pday='${yesterday}');
INSERT INTO prd_bx_ltv_rate_short_6m_pd PARTITION(pday='${yesterday}')
10.分区排序,内排序,外排序
select *,row_number() over (partition by order_no order by open_id desc) num
- odps设置分区全显示
set odps.sql.type.system.odps2=true;
set odps.sql.allow.fullscan=true;
set odps.sql.python.version=cp37;
- groupby时count(sum)
, count(case when datediff(g.create_time, to_date(dot.ct, 'yyyy-mm-dd hh:mi:ss'), 'day')=0 then dot.ch end) as daily_pv
, count(distinct case when datediff(g.create_time, to_date(dot.ct, 'yyyy-mm-dd hh:mi:ss'), 'day')=0 then dot.ch end) as daily_ch
, count(dot.ch) as week_pv
, count(distinct dot.ch) as week_ch
- where语句中date比较
and datediff(g.create_time, to_date(dot.ct, 'yyyy-mm-dd hh:mi:ss'), 'day') between 0 and 7
- groupby 常用函数
,max(dot_net_work) AS dot_net_work_max
,min(dot_net_work) AS dot_net_work_min
,COUNT(DISTINCT dot_net_work) AS dot_net_work_cnt
,median(dot_net_speed) AS dot_net_speed_median
,AVG(dot_net_speed) AS dot_net_speed_avg
,sum(split(split(ps,'_')[1],'s')[0]) as dot_load_time_sum
,MEDIAN(split(split(ps,'_')[1],'s')[0])
,DATEDIFF(MAX(ct),min(ct),'ss') as dot_page_time_diff
15.缺失值填充
,coalesce(sum(case when period is null then bxser_amts end), 0) as bs_service_amount
,round(coalesce(p2_rate,0),4) p2_rate
16.生成临时表 with, 后面必须select * from
with real_premium_table as
(
select order_no
,sum(case when period is not null then bxrene_amts + first_amts end ) as bs_rene_amount
,coalesce(sum(case when period is null then bxser_amts end), 0) as bs_service_amount
from baoxian_olap.fin_bx_dwb_orderamount_df
where pday= SUBSTR(DATEADD(GETDATE(), -1, 'dd'), 1, 10)
and order_date>='2021-01-01'
GROUP by order_no
)
--取续期收入
, renewal_plan_table as
(
SELECT order_no
,sum(case when period = '2' then renewal_fact_premium else 0 end) M2_real_premium
,sum(case when (GETDATE() between period_start and period_end) and (DATEDIFF(GETDATE(), create_time, 'mm') = (int(period)-1)) then renewal_premium end) as deducted_premium
FROM baoxian_olap.fin_bx_dwb_order_renewal_plan_d
WHERE pday >= '2021-01-01'
AND period_type = '短险'
AND is_gift != 1
group by order_no
)
17.concat拼接字符串
,concat(
'{"'
,is_appreciation
,'":"'
,concat_ws(',',collect_set(properties_code))
,'"}'
) protect_id
18.随机抽取数据 rand
select *
from table
order by RAND()
limit 50000;