1.创建普通表
hive> CREATE TABLE test_table(name STRING,userid INT,ip STRING COMMENT 'ip address of the user')COMMENT 'this is the test table';
OK
test_table
Time taken: 0.492 seconds, Fetched: 1 row(s)
hive> describe test_table;
OKname string
userid int
ip string ip address of the user
Time taken: 0.457 seconds, Fetched: 3 row(s)
hive> describe extended test_table;
name string
userid int
ip string ip address of the user
Detailed Table Information Table(tableName:test_table, dbName:default, owner:caiyong, createTime:1425990662, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user)], location:hdfs://127.0.0.1:8020/user/hive/warehouse/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1425990662, comment=this is the test table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.133 seconds, Fetched: 5 row(s)
2.创建添加了表分区的表
ROW FORMAT DELIMITED:告诉Hive每一行数据包含多个有界字段
FIELDS TERMINATED BY '\001':指定分隔符
hive> CREATE TABLE test_table_partition(name STRING,userid INT,ip STRING COMMENT 'ip address of the user')COMMENT 'This is the test_table_partition' PARTITIONED BY (part_1 STRING,part_2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS SEQUENCEFILE;
OK
Time taken: 1.207 seconds
hive> show tables 'test_table_p*';
OK
test_table_partition
Time taken: 0.091 seconds, Fetched: 1 row(s)
hive> describe test_table_partition;
OK
name string
userid int
ip string ip address of the user
part_1 string
part_2 string
# Partition Information
# col_name data_type comment
part_1 string
part_2 string
Time taken: 0.428 seconds, Fetched: 11 row(s)
3.创建添加了聚类存储的表
hive> CREATE TABLE test_table_cluster(> name STRING,
> userid INT,
> ip STRING COMMENT 'ip address of the user',
> viewtime INT)COMMENT 'This is the test_table_cluster_store'
> PARTITIONED BY (part_1 STRING,part_2 STRING)
> CLUSTERED BY (userid)
> SORTED BY (viewtime) INTO 2 BUCKETS
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\001'
> COLLECTION ITEMS TERMINATED BY '\002'
> MAP KEYS TERMINATED BY '\003'
> STORED AS SEQUENCEFILE;
OK
Time taken: 1.002 seconds
hive> describe extended test_table_cluster;
OK
name string
userid int
ip string ip address of the user
viewtime int
part_1 string
part_2 string
# Partition Information
# col_name data_type comment
part_1 string
part_2 string
Detailed Table Information Table(tableName:test_table_cluster, dbName:default, owner:caiyong, createTime:1426043343, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user), FieldSchema(name:viewtime, type:int, comment:null), FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], location:hdfs://127.0.0.1:8020/user/hive/warehouse/test_table_cluster, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:2, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{colelction.delim=, mapkey.delim=, serialization.format=, field.delim=}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], parameters:{transient_lastDdlTime=1426043343, comment=This is the test_table_cluster_store}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.158 seconds, Fetched: 14 row(s)
4.创建指定存储路径的外部表
hive> CREATE TABLE test_outtable(> name STRING,
> userid INT,
> ip STRING COMMENT 'ip address of the user',
> viewtime INT)COMMENT 'This is the test_table_cluster_store'
> PARTITIONED BY (part_1 STRING,part_2 STRING)
> CLUSTERED BY (userid)
> SORTED BY (viewtime) INTO 2 BUCKETS
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY '\001'
> STORED AS TEXTFILE
> LOCATION '/user/hive/outtable/';
OK
Time taken: 0.108 seconds
hive> describe extended test_outtable;
OK
name string
userid int
ip string ip address of the user
viewtime int
part_1 string
part_2 string
# Partition Information
# col_name data_type comment
part_1 string
part_2 string
Detailed Table Information Table(tableName:test_outtable, dbName:default, owner:caiyong, createTime:1426043753, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user), FieldSchema(name:viewtime, type:int, comment:null), FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], location:hdfs://127.0.0.1:8020/user/hive/outtable, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:2, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=, field.delim=}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], parameters:{transient_lastDdlTime=1426043753, comment=This is the test_table_cluster_store}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.151 seconds, Fetched: 14 row(s)