– 数据准备(这里应该是分区表,每个分区存放当天活跃用户id,为图方便放在了一个表里)
– 创建hive表
CREATE TABLE `active`(
`id` string,
`ds` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='\t')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
– 加载数据
load data local inpath './active.dat' into table active;
– 设计留存表
CREATE TABLE `user_left_info_day`(
`ds` string,
`liucun_map` map<string,string>)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.