第一部分: 设置表的数据存储目录
bin/hdfs dfs -mkdir -p /user/yhq/lianjia_2nd_house;
第二部分:创建外部表,分区表
create EXTERNAL table db_lianjia.lianjia_2nd_house(
name STRING,
house_type STRING,
house_area STRING,
region STRING,
floor_str STRING,
direction STRING,
total_price STRING,
square_price STRING,
build_date STRING
)
PARTITIONED BY (month_str STRING COMMENT ‘first partition Month’,city_str STRING COMMENT ‘first partition City’)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
LINES TERMINATED BY ‘\n’
STORED AS TEXTFILE
LOCATION ‘/user/huadian/lianjia_2nd_house’;
第三部分:加载数据
1、 创建目录
bin/hdfs dfs -mkdir -p /user/yhq/lianjia_2nd_house/month_str=06/city_str=XA
2、 上传数据
bin/hdfs dfs -put /opt/datas/2nd_xa_06_price.csv /user/yhq/lianjia_2nd_house/month_str=06/city_str=XA
第四部分: 修复分区
ALTER TABLE db_lianjia.lianjia_2nd_house ADD IF NOT EXISTS PARTITION (month_str=“06”,city_str=“XA”)
第五部分:测试
select
count(1)
from
db_lianjia.lianjia_2nd_house
where
month_str=“06”