首先看原始数据
需要把这三列转换成数字格式
其次city 字段不可是如下值:
‘其他城市’,’点评实验室’
最后,经纬度,不可以未NULL ,空字符串,以及0
顺带,表结构如下,以dt 即日期为分区。
o了,表结构有了,我们直接上Demo 代码。
欢迎留言哦。
insert overwrite table dimension.ac_wuba_life partition(dt='2018-04-13')
select shop_name,
cate,
tag_flag,
city,
hot_num,
active_num,
regexp_replace(service_grade, " ↑", "") as service_grade,
address,
map_lat,
map_lng,
shop_url
from(SELECT
shop_name,
cate,
tag_flag,
city,
CASE
WHEN hot_num = '' then 0
ELSE regexp_replace(hot_num, " ↑", "")
END as hot_num,
CASE
WHEN active_num = '' then 0
ELSE regexp_replace(active_num, " ↑", "")
END as active_num,
CASE
WHEN service_grade = '' then 0
ELSE regexp_replace(service_grade, " ↓", "")
END as service_grade,
address,
map_lat,
map_lng,
shop_url
FROM dimension.ac_wuba_life
WHERE map_lat IS NOT NULL
AND map_lng IS NOT NULL
AND map_lat != 0
AND map_lng != 0
AND map_lat != ''
AND map_lng != ''
AND city != '其他城市'
AND city != '点评实验室' ) b;