先反范式化:
-- look at the test data
SELECT * FROM TESTDB.TEST_MERGE ORDER BY pk, val;
*** Query completed. 10 rows found. 2 columns returned.
*** Total elapsed time was 1 second.
pk val
----------- ---
1 a
1 b
1 c
2 d
2 e
2 f
2 g
3 h
4 i
4 j
+---------+---------+---------+---------+---------+---------+---------+----
-- add iterative row id info
REPLACE VIEW TESTDB.TEST_MERGE_V AS
SELECT a.* , ROW_NUMBER() OVER ( PARTITION BY a.pk ORDER BY a.val ) AS ind
ice
FROM TESTDB.TEST_MERGE a
;
*** View has been replaced.
*** Total elapsed time was 1 second.
+---------+---------+---------+---------+---------+---------+---------+----
-- look at the test data with row id
SELECT * FROM TESTDB.TEST_MERGE_V;
*** Query completed. 10 rows found. 3 columns returned.
*** Total elapsed time was 1 second.
pk val indice
----------- --- -----------
1 a 1
1 b 2
1 c 3
2 d 1
2 e 2
2 f 3
2 g 4
3 h 1
4 i 1
4 j 2
+---------+---------+---------+---------+---------+---------+---------+----
-- denormalization process, the below can be embeded into insert clause
WITH RECURSIVE merged( p , v , ind ) AS
(
SELECT pk, CAST( val AS VARCHAR(100) ), indice
FROM TESTDB.TEST_MERGE_V
WHERE indice = 1
UNION ALL
SELECT b.pk, a.v || ',' || b.val, a.ind + 1
FROM merged a INNER JOIN TESTDB.TEST_MERGE_V b
ON a.p = b.pk AND a.ind + 1= b.indice
)
SELECT * FROM merged
QUALIFY ROW_NUMBER() OVER( PARTITION BY p ORDER BY v DESC) = 1
;
*** Query completed. 4 rows found. 3 columns returned.
*** Total elapsed time was 1 second.
p v
----------- ---------------------------------------------------------------
1 a,b,c
2 d,e,f,g
3 h
4 i,j
+---------+---------+---------+---------+---------+---------+---------+----
-- drop the view
DROP VIEW TESTDB.TEST_MERGE_V;
*** View has been dropped.
*** Total elapsed time was 1 second.
================================================================
再来个范式化:
WITH RECURSIVE split( pk, c, str_rest, depth) AS
(
SELECT
pk
, CASE POSITION(',' IN v)
WHEN 0 THEN SUBSTRING(v FROM 1 )
ELSE SUBSTRING(v FROM 1 FOR POSITION( ',' IN v) - 1)
END AS c
, SUBSTRING(v FROM POSITION(',' IN v) + 1) || ',' AS str_rest
, 1
FROM TESTDB.MERGED
UNION ALL
SELECT
pk
, SUBSTRING(str_rest FROM 1 FOR POSITION( ',' IN str_rest) - 1) AS c
, SUBSTRING(str_rest FROM POSITION(',' IN str_rest) + 1) AS str_rest
, depth + 1
FROM split
WHERE POSITION(',' IN str_rest) <> 0
)
SELECT pk, TRIM(c) FROM split WHERE TRIM(c) <> '' GROUP BY 1, 2 ORDER BY 1, 2
;
================================================================
分析:
在反规范化中
1. 需要引入循环变量indice。
2. 由于在递归查询的递归语句部分中不能使用OLAP函数,可以用view来做一个间接的方案。
3. 主要UNION ALL后的每个列的数据类型是和UNION的第一个语句的类型相同,所以那个字符串要类型转换成100,不然拼接后的长度太长会被截断。
在规范化中
1. 要学会使用字符串处理函数,并且注意循环/递归的退出条件。