(5)bit_and(expr):将所有非空值按位AND,如果没有返回null
eg:
SELECT bit_and(col)FROMVALUES(3),(5)AS tab(col);+------------+|bit_and(col)|+------------+|1|+------------+
① 先转成二进制数
3:0115:101
② 按位AND(都为1则1,否则为0)
011101-----001
③ 将二进制数转为十进制
001:1
(6)bit_or(col):将所有非空值按位取OR,如果没有返回null
eg:
SELECT bit_or(col)FROMVALUES(3),(5)AS tab(col);+-----------+|bit_or(col)|+-----------+|7|+-----------+
① 十进制数转成二进制
3:0115:101
② 按位OR(有1为1,无1为0)
011101-----111
③ 将二进制数转为十进制
111:7
(7)bit_xor(col):将所有非空值按位取XOR,如果没有返回null
eg:
SELECT bit_xor(col)FROMVALUES(3),(5)AS tab(col);+------------+|bit_xor(col)|+------------+|6|+------------+
① 十进制数转成二进制
3:0115:101
② 按位XOR(相同为0,否则为1)
011101-----110
③ 将二进制数转为十进制
110:6
(1)cume_dist():计算一个值相对于分区中所有值的位置
eg:
SELECT a, b, cume_dist()OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+--------------------------------------------------------------------------------------------------------------+| a| b|cume_dist()OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRST RANGE BETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+--------------------------------------------------------------------------------------------------------------+| A1|1|0.6666666666666666|| A1|1|0.6666666666666666|| A1|2|1.0|| A2|3|1.0|+---+---+--------------------------------------------------------------------------------------------------------------+
(2)dense_rank():计算一组值中某个值的排名,有重复值,但不会产生间隔(1,1,2,3,3,4)
eg:
SELECT a, b, dense_rank(b)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+--------------------------------------------------------------------------------------------------------------+| a| b|DENSE_RANK()OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+--------------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|1|| A1|2|2|| A2|3|1|+---+---+--------------------------------------------------------------------------------------------------------------+
(3)rank():计算一组值中某个值的排名,有重复值,会产生间隔(1,1,3,4,4,6)
eg:
SELECT a, b, rank(b)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+--------------------------------------------------------------------------------------------------------+| a| b|RANK()OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+--------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|1|| A1|2|3|| A2|3|1|+---+---+--------------------------------------------------------------------------------------------------------+
(4)row_number():计算一组值中某个值的排名,无重复值,不产生间隔(1,2,3,4,5,6)
eg:
SELECT a, b, row_number()OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+--------------------------------------------------------------------------------------------------------------+| a| b|row_number()OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+--------------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|2|| A1|2|3|| A2|3|1|+---+---+--------------------------------------------------------------------------------------------------------------+
(5)lag(input[,offset[,default]]):返回窗口中当前行向前偏移offset行的input值,当向前不足offset行时,填充default值
eg:
SELECT a, b, lag(b)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+-----------------------------------------------------------------------------------------------------------+| a| b|lag(b,1,NULL)OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEEN-1FOLLOWINGAND-1FOLLOWING)|+---+---+-----------------------------------------------------------------------------------------------------------+| A1|1|NULL|| A1|1|1|| A1|2|1|| A2|3|NULL|+---+---+-----------------------------------------------------------------------------------------------------------+
(6)lead(input[,offset[,default]]):返回窗口中当前行向后偏移offset行的input值,当向后不足offset行时,填充default值
eg:
SELECT a, b, lead(b)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+----------------------------------------------------------------------------------------------------------+| a| b|lead(b,1,NULL)OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEEN1FOLLOWINGAND1FOLLOWING)|+---+---+----------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|2|| A1|2|NULL|| A2|3|NULL|+---+---+----------------------------------------------------------------------------------------------------------+
(7)nth_value(input[,offset]):返回窗口中offset行的input值
eg:
SELECT a, b, nth_value(b,2)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+------------------------------------------------------------------------------------------------------------------+| a| b|nth_value(b,2)OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRST RANGE BETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+------------------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|1|| A1|2|1|| A2|3|NULL|+---+---+------------------------------------------------------------------------------------------------------------------+
(8)ntile(n):将窗口中的数据按照顺序切分为n片,返回当前切片值
eg:
SELECT a, b, ntile(2)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+----------------------------------------------------------------------------------------------------------+| a| b|ntile(2)OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+----------------------------------------------------------------------------------------------------------+| A1|1|1|| A1|1|1|| A1|2|2|| A2|3|1|+---+---+----------------------------------------------------------------------------------------------------------+
(9)percent_rank():计算窗口中当前值的百分比排名
eg:
SELECT a, b, percent_rank(b)OVER(PARTITIONBY a ORDERBY b)FROMVALUES('A1',2),('A1',1),('A2',3),('A1',1) tab(a, b);+---+---+----------------------------------------------------------------------------------------------------------------+| a| b|PERCENT_RANK()OVER(PARTITIONBY a ORDERBY b ASC NULLS FIRSTROWSBETWEENUNBOUNDEDPRECEDINGANDCURRENTROW)|+---+---+----------------------------------------------------------------------------------------------------------------+| A1|1|0.0|| A1|1|0.0|| A1|2|1.0|| A2|3|0.0|+---+---+----------------------------------------------------------------------------------------------------------------+
CREATE { DATABASE|SCHEMA } [IFNOTEXISTS] database_name
[COMMENT database_comment ][ LOCATION database_directory ][WITH DBPROPERTIES ( property_name = property_value [,...])]
eg:
CREATEDATABASEIFNOTEXISTS db_name
COMMENT'This is my first db'
LOCATION '/spark_db'WITH DBPROPERTIES(create_time='2024-10-28');
2.1.2、创建表
① 创建数据源表
CREATETABLE[IFNOTEXISTS] table_identifier
[( col_name1 col_type1 [COMMENT col_comment1 ],...)]USING data_source
[ OPTIONS ( key1=val1, key2=val2,...)][ PARTITIONED BY( col_name1, col_name2,...)][CLUSTEREDBY( col_name3, col_name4,...)[ SORTED BY( col_name [ASC|DESC],...)]INTO num_buckets BUCKETS ][ LOCATION path ][COMMENT table_comment ][ TBLPROPERTIES ( key1=val1, key2=val2,...)][AS select_statement ]
eg:
CREATETABLEIFNOTEXISTS tbl_name(
id INT,
name STRING,
class STRING
)USING org.apache.spark.sql.jdbc
OPTIONS(
url="jdbc:mysql://localhost:3306/test",
dbtable="test.student",
driver="com.mysql.cj.jdbc.Driver",user="root",
password="123456")
PARTITIONED BY(class)CLUSTEREDBY(id)
SORTED BY(id)INTO6 BUCKETS
LOCATION "/spark_db/"COMMENT"my first jdbc table"
TBLPROPERTIES (create_time="2024-10-28");
② 创建hive格式表
CREATE[ EXTERNAL ]TABLE[IFNOTEXISTS] table_identifier
[( col_name1[:] col_type1 [COMMENT col_comment1 ],...)][COMMENT table_comment ][ PARTITIONED BY( col_name2[:] col_type2 [COMMENT col_comment2 ],...)|( col_name1, col_name2,...)][CLUSTEREDBY( col_name1, col_name2,...)[ SORTED BY( col_name1 [ASC|DESC], col_name2 [ASC|DESC],...)]INTO num_buckets BUCKETS ][ROW FORMAT row_format ][ STORED AS file_format ][ LOCATION path ][ TBLPROPERTIES ( key1=val1, key2=val2,...)][AS select_statement ]
row_format:
: SERDE serde_class [WITH SERDEPROPERTIES (k1=v1, k2=v2,...)]| DELIMITED [FIELDSTERMINATEDBY fields_terminated_char [ESCAPEDBY escaped_char ]][ COLLECTION ITEMS TERMINATEDBY collection_items_terminated_char ][ MAP KEYSTERMINATEDBY map_key_terminated_char ][LINESTERMINATEDBY row_terminated_char ][NULL DEFINED AS null_char ]
eg:(ESCAPEDBY'\n'的意思是\n应该被转义)CREATE EXTERNAL TABLE family(
name STRING,
friends ARRAY<STRING>,
children MAP<STRING,INT>,
address STRUCT<street:STRING, city:STRING>)ROW FORMAT DELIMITED FIELDSTERMINATEDBY','ESCAPEDBY'\n'
COLLECTION ITEMS TERMINATEDBY'_'
MAP KEYSTERMINATEDBY':'LINESTERMINATEDBY'\n'NULL DEFINED AS'foonull'
STORED AS TEXTFILE
LOCATION '/tmp/family/';
③ 使用like复制已存在的表结构
CREATETABLE[IFNOTEXISTS] table_identifier LIKE source_table_identifier
USING data_source
[ROW FORMAT row_format ][ STORED AS file_format ][ TBLPROPERTIES ( key1=val1, key2=val2,...)][ LOCATION path ]
eg:
CREATETABLE Student_Dupli like Student USING CSV;CREATETABLE Student_Dupli like Student
ROW FORMAT DELIMITED FIELDSTERMINATEDBY','
STORED AS TEXTFILE
TBLPROPERTIES ('owner'='xxxx');
2.1.3、创建视图
CREATE[ORREPLACE][[GLOBAL]TEMPORARY]VIEW[IFNOTEXISTS] view_identifier
create_view_clauses AS query
eg:
CREATEORREPLACEVIEW experienced_employee
(ID COMMENT'Unique identification number', Name)COMMENT'View for experienced employees'ASSELECT id, name FROM all_employee
WHERE working_years >5;
eg:
CREATEGLOBALTEMPORARYVIEWIFNOTEXISTS subscribed_movies
ASSELECT mo.member_id, mb.full_name, mo.movie_title
FROM movies AS mo INNERJOIN members AS mb
ON mo.member_id = mb.id;
CREATE[ORREPLACE][TEMPORARY]FUNCTION[IFNOTEXISTS]
function_name AS class_name [ resource_locations ];
eg:
CREATEFUNCTION simple_udf AS'SimpleUdf'USING JAR '/tmp/SimpleUdf.jar';
2.2、DML
2.2.1、使用INSERT向表中插入数据
INSERT[INTO| OVERWRITE ][TABLE] table_identifier [ partition_spec ][( column_list )]
{ VALUES( { value|NULL } [,...])[,(...)]| query };
eg:
INSERTINTO students VALUES('Amy Smith','123 Park Ave, San Jose',111111);INSERTINTO students VALUES('Bob Brown','456 Taylor St, Cupertino',222222),('Cathy Johnson','789 Race Ave, Palo Alto',333333);
INSERTINTO[TABLE] table_identifier REPLACEWHERE boolean_expression query;
eg:
INSERTINTO students PARTITION(student_id =444444)SELECT name, address FROM persons WHERE name ="Dora Williams";INSERTINTO students TABLE visiting_students;INSERTINTO students
FROM applicants SELECT name, address, student_id WHERE qualified =true;INSERTINTO persons REPLACEWHERE ssn =123456789SELECT*FROM persons2
解释:将person2中的数据替换person中ssn=123456789的数据
hive格式
eg:
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination'
STORED AS orc
SELECT*FROM test_table;INSERT OVERWRITE LOCAL DIRECTORY '/tmp/destination'ROW FORMAT DELIMITED FIELDSTERMINATEDBY','SELECT*FROM test_table;
2.2.3、使用LOAD DATA将数据加载到表
LOAD DATA [ LOCAL ] INPATH path [ OVERWRITE ] INTO TABLE table_identifier [ partition_spec ]
eg:
LOAD DATA LOCAL INPATH '/user/hive/warehouse/students' OVERWRITE INTO TABLE test_load;
LOAD DATA LOCAL INPATH '/user/hive/warehouse/test_partition/c2=2/c3=3'
OVERWRITE INTO TABLE test_load_partition PARTITION (c2=2, c3=3);
2.3、查询
2.3.1、WITH
WITH common_table_expression [ , ... ]
其中 common_table_expression 为
expression_name [ ( column_name [ , ... ] ) ] [ AS ] ( query );
eg:
WITH
t AS (SELECT 1),
t2 AS (
WITH t AS (SELECT 2)
SELECT * FROM t
);
2.3.2、CLUSTER BY
对查询数据进行分区,然后对每个分区中的数据进行排序(等同于先执行DISTRIBUTE BY,再执行SORT BY)
CLUSTER BY { expression [ , ... ] }
eg:
SELECT age, name FROM person CLUSTER BY age;
+---+-------+
|age| name|
+---+-------+
| 18| John A|
| 18| Anil B|
| 25|Zen Hui|
| 25| Mike A|
| 16|Shone S|
| 16| Jack N|
+---+-------+
2.3.3、DISTRIBUTE BY
对查询数据重新分区
DISTRIBUTE BY { expression [ , ... ] };
eg:
SELECT age, name FROM person DISTRIBUTE BY age;
+---+-------+
|age| name|
+---+-------+
| 25|Zen Hui|
| 25| Mike A|
| 18| John A|
| 18| Anil B|
| 16|Shone S|
| 16| Jack N|
+---+-------+
3.3.4、GROUP BY
GROUP BY group_expression [ , group_expression [ , ... ] ] [ WITH { ROLLUP | CUBE } ];
GROUP BY { group_expression | { ROLLUP | CUBE | GROUPING SETS } (grouping_set [ , ...]) } [ , ... ];
聚合函数定义为
aggregate_name ( [ DISTINCT ] expression [ , ... ] ) [ FILTER ( WHERE boolean_expression ) ];
参数解释:
GROUPING SETS:将多个分组条件合并为一个集合,相当于多个group by的结果UNION ALL。GROUP BY GROUPING SETS((name,age),(name))相当于GROUP BY name,age UNION ALL GROUP BY name。
ROLLUP:是GROUPING SETS的简写形式。例如GROUP BY warehouse,product WITH ROLLUP或GROUP BY ROLLUP(warehouse,product)等同于GROUP BY GROUPING SETS((warehouse,product),(warehouse))。ROLLUP规范的N个元素会生成N+1个GROUPING SETS。
CUBE:是GROUPING SETS的简写形式。例如GROUP BY warehouse,product WITH CUBE或GROUP BY CUBE(warehouse,product)等同于GROUP BY GROUPING SETS((warehouse,product),(warehouse),(product),())。CUBE规范的N个元素会生成2^N个GROUPING SETS。
FILTER:过滤输入行,其中where子句为true的行将传递给聚合函数,其他行将被丢弃。
eg:
SELECT id, sum(quantity) FILTER (
WHERE car_model IN ('Honda Civic', 'Honda CRV')
) AS `sum(quantity)` FROM dealer
GROUP BY id ORDER BY id;
+---+-------------+
| id|sum(quantity)|
+---+-------------+
|100| 17|
|200| 23|
|300| 5|
+---+-------------+
SELECT city, car_model, sum(quantity) AS sum FROM dealer
GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ())
ORDER BY city;
+---------+------------+---+
| city| car_model|sum|
+---------+------------+---+
| null| null| 78|
| null| HondaAccord| 33|
| null| HondaCRV| 10|
| null| HondaCivic| 35|
| Dublin| null| 33|
| Dublin| HondaAccord| 10|
| Dublin| HondaCRV| 3|
| Dublin| HondaCivic| 20|
| Fremont| null| 32|
| Fremont| HondaAccord| 15|
| Fremont| HondaCRV| 7|
| Fremont| HondaCivic| 10|
| San Jose| null| 13|
| San Jose| HondaAccord| 8|
| San Jose| HondaCivic| 5|
+---------+------------+---+
SELECT city, car_model, sum(quantity) AS sum FROM dealer
GROUP BY city, car_model WITH ROLLUP
ORDER BY city, car_model;
+---------+------------+---+
| city| car_model|sum|
+---------+------------+---+
| null| null| 78|
| Dublin| null| 33|
| Dublin| HondaAccord| 10|
| Dublin| HondaCRV| 3|
| Dublin| HondaCivic| 20|
| Fremont| null| 32|
| Fremont| HondaAccord| 15|
| Fremont| HondaCRV| 7|
| Fremont| HondaCivic| 10|
| San Jose| null| 13|
| San Jose| HondaAccord| 8|
| San Jose| HondaCivic| 5|
+---------+------------+---+
SELECT city, car_model, sum(quantity) AS sum FROM dealer
GROUP BY city, car_model WITH CUBE
ORDER BY city, car_model;
+---------+------------+---+
| city| car_model|sum|
+---------+------------+---+
| null| null| 78|
| null| HondaAccord| 33|
| null| HondaCRV| 10|
| null| HondaCivic| 35|
| Dublin| null| 33|
| Dublin| HondaAccord| 10|
| Dublin| HondaCRV| 3|
| Dublin| HondaCivic| 20|
| Fremont| null| 32|
| Fremont| HondaAccord| 15|
| Fremont| HondaCRV| 7|
| Fremont| HondaCivic| 10|
| San Jose| null| 13|
| San Jose| HondaAccord| 8|
| San Jose| HondaCivic| 5|
+---------+------------+---+
3.3.5、提示
提示为用户提供了一种方法,可以建议sparksql使用特定方法来生成执行计划
/*+ hint [ , ... ] */
分区提示:允许用户指定spark应该遵循的分区策略
COALESCE:使用coalesce提示将分区数量减少到指定的分区数量。将分区数量作为参数
REPARTITION:使用指定的分区表达式将分区重新分区到指定的分区数量。将分区数量、列名作为参数
REPARTITION_BY_RANGE:使用指定的分区表达式将分区重新分区到指定的分区数量。将列名和分区数量作为参数
REBALANCE:重新平衡查询结果输出分区,以便每个分区的大小都合理。将列名作为参数
eg:
SELECT /*+ COALESCE(3) */ * FROM t;
SELECT /*+ REPARTITION(3) */ * FROM t;
SELECT /*+ REPARTITION(c) */ * FROM t;
SELECT /*+ REPARTITION(3, c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t;
SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t;
SELECT /*+ REBALANCE */ * FROM t;
SELECT /*+ REBALANCE(3) */ * FROM t;
SELECT /*+ REBALANCE(c) */ * FROM t;
SELECT /*+ REBALANCE(3, c) */ * FROM t;
连接提示:允许用户指定spark应该使用的连接策略
BROADCAST:使用广播连接。具有提示的连接侧将被广播,如果两侧都有广播,则大小较小的一侧将被广播。
MERGE:使用shuffle排序合并连接。
SHUFFLE_HASH:使用shuffle哈希连接、
SHUFFLE_REPLICATE_NL:使用shuffle和复制嵌套循环连接。
eg:
-- Join Hints for broadcast join
SELECT /*+ BROADCAST(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ BROADCASTJOIN (t1) */ * FROM t1 left JOIN t2 ON t1.key = t2.key;
SELECT /*+ MAPJOIN(t2) */ * FROM t1 right JOIN t2 ON t1.key = t2.key;
-- Join Hints for shuffle sort merge join
SELECT /*+ SHUFFLE_MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGEJOIN(t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
SELECT /*+ MERGE(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
-- Join Hints for shuffle hash join
SELECT /*+ SHUFFLE_HASH(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
-- Join Hints for shuffle-and-replicate nested loop join
SELECT /*+ SHUFFLE_REPLICATE_NL(t1) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
-- When different join strategy hints are specified on both sides of a join, Spark
-- prioritizes the BROADCAST hint over the MERGE hint over the SHUFFLE_HASH hint
-- over the SHUFFLE_REPLICATE_NL hint.
-- Spark will issue Warning in the following example
-- org.apache.spark.sql.catalyst.analysis.HintErrorLogger: Hint (strategy=merge)
-- is overridden by another hint and will not take effect.
SELECT /*+ BROADCAST(t1), MERGE(t1, t2) */ * FROM t1 INNER JOIN t2 ON t1.key = t2.key;
OFFSET integer_expression;
eg:
CREATE TABLE person (name STRING, age INT);
INSERT INTO person VALUES
('Zen Hui', 25),
('Anil B', 18),
('Shone S', 16),
('Mike A', 25),
('John A', 18),
('Jack N', 16);
-- Skip the first two rows.
SELECT name, age FROM person ORDER BY name OFFSET 2;
+-------+---+
| name|age|
+-------+---+
| John A| 18|
| Mike A| 25|
|Shone S| 16|
|Zen Hui| 25|
+-------+---+
-- Skip the first two rows and returns the next three rows.
SELECT name, age FROM person ORDER BY name LIMIT 3 OFFSET 2;
+-------+---+
| name|age|
+-------+---+
| John A| 18|
| Mike A| 25|
|Shone S| 16|
+-------+---+
-- A function expression as an input to OFFSET.
SELECT name, age FROM person ORDER BY name OFFSET length('SPARK');
+-------+---+
| name|age|
+-------+---+
|Zen Hui| 25|
+-------+---+
OFFSET integer_expression;
eg:
CREATE TABLE person (name STRING, age INT);
INSERT INTO person VALUES
('Zen Hui', 25),
('Anil B', 18),
('Shone S', 16),
('Mike A', 25),
('John A', 18),
('Jack N', 16);
-- Skip the first two rows.
SELECT name, age FROM person ORDER BY name OFFSET 2;
+-------+---+
| name|age|
+-------+---+
| John A| 18|
| Mike A| 25|
|Shone S| 16|
|Zen Hui| 25|
+-------+---+
-- Skip the first two rows and returns the next three rows.
SELECT name, age FROM person ORDER BY name LIMIT 3 OFFSET 2;
+-------+---+
| name|age|
+-------+---+
| John A| 18|
| Mike A| 25|
|Shone S| 16|
+-------+---+
-- A function expression as an input to OFFSET.
SELECT name, age FROM person ORDER BY name OFFSET length('SPARK');
+-------+---+
| name|age|
+-------+---+
|Zen Hui| 25|
+-------+---+