介绍
Trafodion中用CREATE TABLE语句创建一个Trafodion SQL表,这个表是底层HBase表的映射。
语法
CREATE [VOLATILE] TABLE [IF NOT EXISTS] table
table-spec
[SALT USING num PARTITIONS [ON (column[, column]...)]]
[STORE BY {PRIMARY KEY | (key-column-list)}]
[DIVISION BY]
[HBASE_OPTIONS (hbase-options-list)]
[ATTRIBUTES ALIGNED FORMAT]
[LOAD IF EXISTS | NO LOAD]
[AS select-query]
CREATE [VOLATILE] TABLE [IF NOT EXISTS] table
like-spec
[SALT USING num PARTITIONS [ON (column[, column]...)]]
语法描述
- VOLATILE
表示创建只在对应的session中生效,session结束后表自动消失。
- IF NOT EXITS
当表不存在时存在,存在不创建。
- SALT USING num PARTITIONS [ON (column[, column]...)]
将表预分区为多个region,Salting在行键的最前面添加一个Hash值作为行键的一部分,可以防止Sequential键的热点问题。num的范围在2~2014。如果不指定列表,默认使用主键中的所有列。隐式的添加一个新的列"_SALT_",列值自动计算,"_SALT_"作为聚集键的最开头的一部分。
SALT后面接LIKE表示继承源表的盐粒分布,一般在创建索引中用的较多。
- STORE BY {PRIMARY KEY | (key-column-list)}
指定由哪些列组成聚簇键。聚簇键决定行数据在物理上按顺序存储。主键也是聚簇键。主键一般还用于唯一性保证,如果不需要用主键时,可以用STORE BY来决定数据存储的关联性。当表没有主键的时候,会自动创建一个SYSKEY,其类型为LARGEINT。
- DIVISION BY
为了解决冷热数据的问题,比如经常查询最近几天的数据,那以前的数据就是冷数据。DIVISION BY可以实现冷热数据分离,老数据集在key值较小的一端,新数据则集中在key值较大的一端。DIVISION会隐式地添加新列"_DIVISION_1_"并作为聚集键的一部分,紧跟在"_SALT_"列之后。
- HBASE_OPTIONS (hbase-option = 'value'[, hbase-option = 'value']...)
hbase-option有很多种,详细请见Trafodion SQL手册。常用的包括compression,data_block_encoding,memstore_flush_size。HBase Options语句在建表的时候被直接传递到HBase层。
- ATTRIBUTES ALIGNED FORMAT
默认情况下每一个SQL列在HBase中也是一列,存储在HBase的一个cell中,Aligned rows把所有列值存储在单独的一个HBase cell中,从而实现更好的插入性能,查询也更优
- LOAD IF EXISTS
当做CREATE TABLE ... AS ...时,如果源表有数据,会把源表中的数据一起加载进来。
- NO LOAD
与上述相反,不论源表是否有数据,都不加载数据,而是只创建表结构。
- LIKE soure-table [include-option]...
创建一个表与源表结构相同,但主键、非空及分区等会被忽略。当然,可以通过include-option另外添加进来,include-option包括:WITH CONSTRAINTS,WITH PARTITIONS,WITHOUT DIVISION,WITHOUT SALT等。
样例
- VOLATILE
Session1中创建一张,在Session2中查不到
SQL>--SESSION1
SQL>create volatile table test1(a int, b varchar(10));
--- SQL operation complete.
SQL>select * from test1;
--- 0 row(s) selected.
SQL>--SESSION2
SQL>select * from test1;
*** ERROR[4082] Object TRAFODION.SEABASE.TEST1 does not exist or is inaccessible. [2016-11-02 15:01:52]
- SALT USING num PARTITIONS [ON (column[, column]...)]
SQL>create table test1(a int not null,
+>b varchar(10),
+>primary key (a))
+>salt using 4 partitions;
--- SQL operation complete.
SQL>invoke test1;
-- Definition of Trafodion table TRAFODION.SEABASE.TEST1
-- Definition current Wed Nov 2 15:07:08 2016
(
A INT NO DEFAULT NOT NULL NOT DROPPABLE
SERIALIZED
, B VARCHAR(10) CHARACTER SET ISO88591 COLLATE
DEFAULT DEFAULT NULL SERIALIZED
, "_SALT_" INT UNSIGNED NO DEFAULT NOT NULL NOT
DROPPABLE SERIALIZED
)
PRIMARY KEY ("_SALT_" ASC, A ASC)
--- SQL operation complete.
- STORE BY {PRIMARY KEY | (key-column-list)}
通过以下结果可以发现,如果表中没有主键,则STORE BY会自动生成一个SYSKEY,即系统定义的簇键。
SQL>create table test2(a int not null,
+>b varchar(10),
+>primary key (a))
+>store by primary key;
--- SQL operation complete.
SQL>invoke test2;
-- Definition of Trafodion table TRAFODION.SEABASE.TEST2
-- Definition current Wed Nov 2 15:13:57 2016
(
A INT NO DEFAULT NOT NULL NOT DROPPABLE
SERIALIZED
, B VARCHAR(10) CHARACTER SET ISO88591 COLLATE
DEFAULT DEFAULT NULL SERIALIZED
)
PRIMARY KEY (A ASC)
--- SQL operation complete.
SQL>create table test3(a int not null,
+>b varchar(10))
+>store by (a);
--- SQL operation complete.
SQL>invoke test3;
-- Definition of Trafodion table TRAFODION.SEABASE.TEST3
-- Definition current Wed Nov 2 15:17:10 2016
(
SYSKEY LARGEINT NO DEFAULT NOT NULL NOT DROPPABLE
NOT SERIALIZED
, A INT NO DEFAULT NOT NULL NOT DROPPABLE
SERIALIZED
, B VARCHAR(10) CHARACTER SET ISO88591 COLLATE
DEFAULT DEFAULT NULL SERIALIZED
)
--- SQL operation complete.
- HBASE_OPTIONS (hbase-option = 'value'[, hbase-option = 'value']...)
SQL>create table test1(a int not null,
+>b varchar(10))
+>hbase_options (data_block_encoding='FAST_DIFF',
+>compression='SNAPPY',
+>memstore_flush_size='1073741824');
--- SQL operation complete.
SQL>showddl test1;
CREATE TABLE TRAFODION.SEABASE.TEST1
(
A INT NO DEFAULT NOT NULL NOT DROPPABLE
SERIALIZED
, B VARCHAR(10) CHARACTER SET ISO88591 COLLATE
DEFAULT DEFAULT NULL SERIALIZED
)
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY',
MEMSTORE_FLUSH_SIZE = '1073741824'
)
;
--- SQL operation complete.
- LOAD IF EXISTS
通过下面结果,可以发现源表中的数据也一同插入到新表中。
SQL>insert into test1 values(1,'tim'),(2,'lance');
--- 2 row(s) inserted.
SQL>create table test2 load if exists as select * from test1;
--- 2 row(s) inserted.
SQL>select * from test2;
A B
----------- ----------
1 tim
2 lance
--- 2 row(s) selected.
- NO LOAD
通过下面结果,可以发现源表中的数据并没有插入到新表中。
SQL>select * from test1;
A B
----------- ----------
1 tim
2 lance
--- 2 row(s) selected.
SQL>create table test2 no load as select * from test1;
--- 0 row(s) inserted.
SQL>select * from test2;
--- 0 row(s) selected.
- ATTRIBUTES ALIGNED FORMAT
创建两个相同的表,唯一区别是一个有ATTRIBUTES ALIGNED FORMAT属性。通过以下结果可以发现,ATTRIBUTES ALIGNED FORMAT表在HBase中所有的列都存在在单独的HBase cell中。
SQL>create table test1 (a int not null, b varchar(10));
insert into test1 values(1,'A'),(2,'B);
SQL>create table test2(a int not null, b varchar(10)) attributes aligned format;
insert into test2 values(1,'A'),(2,'B');
hbase(main):003:0> scan 'TRAFODION.SEABASE.TEST1'
ROW COLUMN+CELL
\x85\x1Cf\x806\xB21\xC1 column=#1:\x01, timestamp=1478073465158, value=\xC11\xB26\x80f\x1C\x05
\x85\x1Cf\x806\xB21\xC1 column=#1:\x02, timestamp=1478073465158, value=\x80\x00\x00\x01
\x85\x1Cf\x806\xB21\xC1 column=#1:\x03, timestamp=1478073465158, value=\x00A
\x85\x1Cf\x806\xB2J/ column=#1:\x01, timestamp=1478073465160, value=/J\xB26\x80f\x1C\x05
\x85\x1Cf\x806\xB2J/ column=#1:\x02, timestamp=1478073465160, value=\x80\x00\x00\x02
\x85\x1Cf\x806\xB2J/ column=#1:\x03, timestamp=1478073465160, value=\x00B
2 row(s) in 0.3890 seconds
hbase(main):004:0> scan 'TRAFODION.SEABASE.TEST2'
ROW COLUMN+CELL
\x85\x1Cf\x807\x0D\x0C\xD1 column=#1:\x01, timestamp=1478073471112, value=\x10\xC0\x00\x00\x0C\x00\x00\x00\x1C\x00\x00\x00\x00\x00\x00\x00\xD1\x0C\x0D7\x80f\x1C\x05\x01\x00\x00\
x00\x01\x00\x00\x00A\x00\x00\x00
\x85\x1Cf\x807\x0D$; column=#1:\x01, timestamp=1478073471114, value=\x10\xC0\x00\x00\x0C\x00\x00\x00\x1C\x00\x00\x00\x00\x00\x00\x00;$\x0D7\x80f\x1C\x05\x02\x00\x00\x00\x0
1\x00\x00\x00B\x00\x00\x00
2 row(s) in 0.0160 seconds
- DIVISION BY
通过以下结果可以发现,DIVISION BY语句会在表中隐式地添加列"_DIVISION_1_",作为聚集键的一部分。
SQL>create table test1(a int not null,
+>b varchar(10),
+>c timestamp not null)
+>store by (a,c)
+>division by (date_part('yearweek', c));
--- SQL operation complete.
SQL>invoke test1;
-- Definition of Trafodion table TRAFODION.SEABASE.TEST1
-- Definition current Wed Nov 2 16:40:36 2016
(
SYSKEY LARGEINT NO DEFAULT NOT NULL NOT DROPPABLE
NOT SERIALIZED
, A INT NO DEFAULT NOT NULL NOT DROPPABLE
SERIALIZED
, B VARCHAR(10) CHARACTER SET ISO88591 COLLATE
DEFAULT DEFAULT NULL SERIALIZED
, C TIMESTAMP(6) NO DEFAULT NOT NULL NOT
DROPPABLE NOT SERIALIZED
, "_DIVISION_1_" NUMERIC(6, 0) NO DEFAULT NOT NULL NOT
DROPPABLE SERIALIZED
)
--- SQL operation complete.
建表样本语法
CREATE TABLE TRAFODION.SEABASE.EDATA
(
EID LARGEINT GENERATED BY DEFAULT AS IDENTITY
( START WITH 1 INCREMENT BY 1 MAXVALUE 9223372036854775806 MINVALUE 1
CACHE 25 NO CYCLE LARGEINT ) NOT NULL NOT DROPPABLE NOT SERIALIZED
, CTIME TIMESTAMP(6) DEFAULT CURRENT NOT NULL NOT
DROPPABLE NOT SERIALIZED
, MAC VARCHAR(64 CHARS) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, ADDR INT DEFAULT NULL NOT SERIALIZED
, TITLE VARCHAR(64 CHARS) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, O_C SMALLINT DEFAULT NULL NOT SERIALIZED
, ENABLE_NET_CTRL SMALLINT DEFAULT NULL NOT SERIALIZED
, ALARM INT DEFAULT NULL NOT SERIALIZED
, MODEL VARCHAR(64 CHARS) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, SPECIFICATION VARCHAR(64 CHARS) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, VERSION VARCHAR(64 CHARS) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, A_A DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, A_LD DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, A_T DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, A_V DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, A_W DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, POWER DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXDW DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXGG DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXGL DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXGW DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXGY DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXLD DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, MXQY DOUBLE PRECISION DEFAULT NULL NOT
SERIALIZED
, CONTROL SMALLINT DEFAULT NULL NOT SERIALIZED
, VISIBILITY SMALLINT DEFAULT NULL NOT SERIALIZED
)
STORE BY (CTIME, EID)
DIVISION BY (DATE_PART('YEARWEEK',CTIME))
SALT USING 12 PARTITIONS
ATTRIBUTES ALIGNED FORMAT
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY',
MEMSTORE_FLUSH_SIZE = '1073741824'
)
;