hive>createtable inner_table (key string);
hive>loaddata local inpath '/root/inner_table.dat' into table inner_table;
select* from inner_table
selectcount(*) from inner_table
例如:test表中包含date和city两个Partition,
则对应于date=20130201,city = bj的HDFS子目录为:
/warehouse/test/date=20130201/city=bj
对应于date=20130202,city=sh的HDFS子目录为;
/warehouse/test/date=20130202/city=sh
CREATETABLEtmp_table #表名
(
title string,#字段名称字段类型
minimum_bid double,
quantity bigint,
have_invoice bigint
)COMMENT'注释:XXX'#表注释
PARTITIONED BY(ptSTRING)#分区表字段(如果你文件非常之大的话,采用分区表可以快过滤出按分区字 段 划分的数据)
ROW FORMAT DELIMITED
FIELDSTERMINATED BY'\001' # 字段是用什么分割开的
一些相关命令
SHOWTABLES; #查看所有的表
SHOWTABLES'*TMP*'; #支持模糊查询
SHOW PARTITIONS TMP_TABLE; #查看表有哪些分区
DESCRIBETMP_TABLE; #查看表结构
hive>createexternal table external_table1 (key string) ROW FORMAT DELIMITED FIELDSTERMINATED
BY '\t' location '/home/external';
在HDFS创建目录/home/external
#hadoopfs -put /home/external_table.dat /home/external
LOADDATA INPATH '/home/external_table1.dat' INTO TABLE external_table1;
select* from external_table
selectcount(*) from external_table
droptable external_table
createtable bucket_table(id string) clustered by(id) into 4 buckets;
sethive.enforce.bucketing = true;
insertinto table bucket_table select name from stu;
insertoverwrite table bucket_table select name from stu;
select* from bucket_table tablesample(bucket 1 out of 4 on id);