从一张已有的Hive Table中创建新表及Partition出现如下问题
- 原有Hive Table中有160g数据(为三个月中所有应用和服务器的用户访问记录)
- 新表选取需要字段,并按照应用/服务器Ip/访问时间创建Partition
-
//创建table set hive.exec.dynamic.partition=true; set hive.exec.dynamic.partition.mode=nonstrict; CREATE TABLE IF NOT EXISTS app_trace( trace_id string, client_ip string, user_device string, user_id string, user_account string, org_id string, org_name string, org_path string, org_parent_id string, url string, completed boolean, cost int, create_time bigint, parameters map<string,string>, subtrace array<string> ) PARTITIONED BY (app_id int,server_ip string,create_date string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\|' COLLECTION ITEMS TERMINATED BY '\$' MAP KEYS TERMINATED BY '\:' STORED AS SEQUENCEFILE //加载数据 insert OVERWRITE table app_trace partition(app_id,server_ip,craete_date) select trace_id, client_ip, user_device, user_id, user_account, org_id, org_name, org_path, org_parent_id, url, completed,