Hive创建表

本文介绍了如何使用Hive创建不同类型的数据表,包括普通表、添加了表分区的表、聚类存储的表以及指定存储路径的外部表。通过示例展示了创建过程,如创建带有字段注释的普通表。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1.创建普通表


hive> CREATE TABLE test_table(name STRING,userid INT,ip STRING COMMENT 'ip address of the user')COMMENT 'this is the test table';


hive> show tables;
OK
test_table

Time taken: 0.492 seconds, Fetched: 1 row(s)

hive> describe test_table;

OK
name                    string                                      
userid                  int                                         
ip                      string                  ip address of the user

Time taken: 0.457 seconds, Fetched: 3 row(s)

hive> describe extended test_table;

OK
name                    string                                      
userid                  int                                         
ip                      string                  ip address of the user
         
Detailed Table Information    Table(tableName:test_table, dbName:default, owner:caiyong, createTime:1425990662, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user)], location:hdfs://127.0.0.1:8020/user/hive/warehouse/test_table, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1425990662, comment=this is the test table}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)    
Time taken: 0.133 seconds, Fetched: 5 row(s)

2.创建添加了表分区的表


ROW FORMAT DELIMITED:告诉Hive每一行数据包含多个有界字段
FIELDS TERMINATED BY '\001':指定分隔符

hive> CREATE TABLE test_table_partition(name STRING,userid INT,ip STRING COMMENT 'ip address of the user')COMMENT 'This is the test_table_partition' PARTITIONED BY (part_1 STRING,part_2 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS SEQUENCEFILE;
OK
Time taken: 1.207 seconds

hive> show tables 'test_table_p*';
OK
test_table_partition
Time taken: 0.091 seconds, Fetched: 1 row(s)

hive> describe test_table_partition;
OK
name                    string                                      
userid                  int                                         
ip                      string                  ip address of the user
part_1                  string                                      
part_2                  string                                      
         
# Partition Information         
# col_name                data_type               comment             
         
part_1                  string                                      
part_2                  string                                      
Time taken: 0.428 seconds, Fetched: 11 row(s)

3.创建添加了聚类存储的表

hive> CREATE TABLE test_table_cluster(
    > name STRING,
    > userid INT,
    > ip STRING COMMENT 'ip address of the user',
    > viewtime INT)COMMENT 'This is the test_table_cluster_store'
    > PARTITIONED BY (part_1 STRING,part_2 STRING)
    > CLUSTERED BY (userid)
    > SORTED BY (viewtime) INTO 2 BUCKETS
    > ROW FORMAT DELIMITED
    > FIELDS TERMINATED BY '\001'
    > COLLECTION ITEMS TERMINATED BY '\002'
    > MAP KEYS TERMINATED BY '\003'
    > STORED AS SEQUENCEFILE
;
OK
Time taken: 1.002 seconds

hive> describe extended test_table_cluster;
OK
name                    string                                      
userid                  int                                         
ip                      string                  ip address of the user
viewtime                int                                         
part_1                  string                                      
part_2                  string                                      
         
# Partition Information         
# col_name                data_type               comment             
         
part_1                  string                                      
part_2                  string                                      
         
Detailed Table Information    Table(tableName:test_table_cluster, dbName:default, owner:caiyong, createTime:1426043343, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user), FieldSchema(name:viewtime, type:int, comment:null), FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], location:hdfs://127.0.0.1:8020/user/hive/warehouse/test_table_cluster, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:2, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{colelction.delim=, mapkey.delim=, serialization.format=, field.delim=}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], parameters:{transient_lastDdlTime=1426043343, comment=This is the test_table_cluster_store}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)    
Time taken: 0.158 seconds, Fetched: 14 row(s)

4.创建指定存储路径的外部表

hive> CREATE TABLE test_outtable(
    >      name STRING,
    >      userid INT,
    >      ip STRING COMMENT 'ip address of the user',
    >      viewtime INT)COMMENT 'This is the test_table_cluster_store'
    >      PARTITIONED BY (part_1 STRING,part_2 STRING)
    >      CLUSTERED BY (userid)
    >      SORTED BY (viewtime) INTO 2 BUCKETS
    >      ROW FORMAT DELIMITED
    >      FIELDS TERMINATED BY '\001'
    >      STORED AS TEXTFILE
    >      LOCATION '/user/hive/outtable/'
;
OK
Time taken: 0.108 seconds

hive> describe extended test_outtable;
OK
name                    string                                      
userid                  int                                         
ip                      string                  ip address of the user
viewtime                int                                         
part_1                  string                                      
part_2                  string                                      
         
# Partition Information         
# col_name                data_type               comment             
         
part_1                  string                                      
part_2                  string                                      
         
Detailed Table Information    Table(tableName:test_outtable, dbName:default, owner:caiyong, createTime:1426043753, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:name, type:string, comment:null), FieldSchema(name:userid, type:int, comment:null), FieldSchema(name:ip, type:string, comment:ip address of the user), FieldSchema(name:viewtime, type:int, comment:null), FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], location:hdfs://127.0.0.1:8020/user/hive/outtable, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:2, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=, field.delim=}), bucketCols:[userid], sortCols:[Order(col:viewtime, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[FieldSchema(name:part_1, type:string, comment:null), FieldSchema(name:part_2, type:string, comment:null)], parameters:{transient_lastDdlTime=1426043753, comment=This is the test_table_cluster_store}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)    
Time taken: 0.151 seconds, Fetched: 14 row(s)





评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值