解决DuckDB不兼容PostgreSQL语法的几个问题

在微信公众号上看到一个PostgreSQL测试脚本,想在DuckDB上做同样的测试,结果遇到不兼容PostgreSQL语法的问题。
在张泽鹏先生的帮助下,逐个都得到了解决,修改后的脚本顺利执行。

  1. 序列号问题
CREATE TABLE customers (
  customer_id SERIAL PRIMARY KEY,
  name TEXT,
  region TEXT,
  join_date TIMESTAMP
);

DuckDB不支持SERIAL写法,需要先创建序列,然后在建表语句中设置默认值为nextval(),SQL语句改写为:

CREATE SEQUENCE id_sequence;
CREATE TABLE customers (
  customer_id int PRIMARY KEY DEFAULT nextval('id_sequence'),
  name TEXT,
  region TEXT,
  join_date TIMESTAMP
);

DuckDB虽然有rowid伪列,但它是数据插入表后自动产生的,不能用在建表语句。

  1. 没有存储过程语句问题
DO $$
DECLARE
  b int;
BEGIN
  FOR b IN 1..10 LOOP
    INSERT INTO customers (name, region, join_date)
    SELECT
      'Customer_' || (gs + (b-1)*100000),
      CASE WHEN random() < 0.33 THEN 'APAC'
           WHEN random() < 0.66 THEN 'EMEA'
           ELSE 'AMER' END,
      now() - make_interval(days => floor(random() * 365)::int)
    FROM generate_series(1,100000) AS gs;
    RAISE NOTICE 'customers batch % done', b;
  END LOOP;
END $$;

有两种解决办法,第一种比较笨拙,先用set variable定义一个变量b,再在插入语句中用getvariable(‘b’)读取它,再重新设置此变量的值,再次执行相同的语句,因为DuckDB也没有重复执行钢材执行的语句的简单办法,所以还要复制整个语句,循环次数稍大就很不方便甚至不可行。
第二种是用generate_series再造一个虚表b与原有gs笛卡尔积,这种方法要注意新增b表和原gs表的先后顺序问题,经过实验,b表放在gs表后面才符合原有循环的语义,即先对b=1的所有值编customer_id号,再依次对b=2、3、4…做相同的操作。

  1. 缺少make_interval函数问题

张先生告知可以用printf('%.0f days', random() * 365)::interval代替make_interval(days => floor(random() * 365)::int),修改这两个问题后的语句是:

INSERT INTO customers (name, region, join_date)
SELECT
  'Customer_' || (gs + (b-1)*100000),
  CASE WHEN random() < 0.33 THEN 'APAC'
       WHEN random() < 0.66 THEN 'EMEA'
       ELSE 'AMER' END,
  now() - printf('%.0f days', random() * 365)::interval
FROM generate_series(1,100000) AS gs(gs),generate_series(1,10)b(b) ;
  1. 运行

原有脚本在postgresql中的执行结果是:

postgres=# \timing on
启用计时功能.
postgres=# CREATE TABLE customers (
postgres(#   customer_id SERIAL PRIMARY KEY,
postgres(#   name TEXT,
postgres(#   region TEXT,
postgres(#   join_date TIMESTAMP
postgres(# );
CREATE TABLE
时间:53.285 ms
postgres=# \i /shujv/par/test_pg.sql
DROP TABLE
时间:17.512 ms
psql:/shujv/par/test_pg.sql:5: 注意:  表 "products" 不存在
DROP TABLE
时间:0.585 ms
psql:/shujv/par/test_pg.sql:6: 注意:  表 "orders" 不存在
DROP TABLE
时间:0.052 ms
CREATE TABLE
时间:30.826 ms
CREATE TABLE
时间:30.188 ms
CREATE TABLE
时间:21.031 ms
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 1 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 2 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 3 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 4 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 5 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 6 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 7 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 8 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 9 done
psql:/shujv/par/test_pg.sql:51: 注意:  customers batch 10 done
DO
时间:7991.844 ms
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 1 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 2 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 3 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 4 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 5 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 6 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 7 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 8 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 9 done
psql:/shujv/par/test_pg.sql:69: 注意:  products batch 10 done
DO
时间:9554.327 ms
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 1 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 2 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 3 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 4 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 5 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 6 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 7 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 8 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 9 done
psql:/shujv/par/test_pg.sql:87: 注意:  orders batch 10 done
DO
时间:48662.112 ms
CREATE INDEX
时间:975.139 ms
CREATE INDEX
时间:878.807 ms
CREATE INDEX
时间:1005.441 ms
CREATE INDEX
时间:2478.417 ms
CREATE INDEX
时间:3616.895 ms
VACUUM
时间:2568.979 ms
                                                                      QUERY PLAN                                                                       
-------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=217238.93..217238.93 rows=1 width=128) (actual time=3609.369..3609.370 rows=3 loops=1)
   CTE recent_orders
     ->  Seq Scan on orders o  (cost=0.00..24853.00 rows=496098 width=26) (actual time=0.013..466.608 rows=493420 loops=1)
           Filter: (order_date > (now() - '180 days'::interval))
           Rows Removed by Filter: 506580
   CTE customer_region_sales
     ->  GroupAggregate  (cost=121862.84..126823.85 rows=3 width=25) (actual time=2147.117..2544.502 rows=3 loops=1)
           Group Key: c.region
           ->  Sort  (cost=121862.84..123103.08 rows=496098 width=25) (actual time=2039.495..2218.204 rows=493420 loops=1)
                 Sort Key: c.region
                 Sort Method: external merge  Disk: 12528kB
                 ->  Hash Join  (cost=35619.00..63059.31 rows=496098 width=25) (actual time=404.332..1505.222 rows=493420 loops=1)
                       Hash Cond: (r.customer_id = c.customer_id)
                       ->  CTE Scan on recent_orders r  (cost=0.00..9921.96 rows=496098 width=20) (actual time=0.015..702.330 rows=493420 loops=1)
                       ->  Hash  (cost=18236.00..18236.00 rows=1000000 width=9) (actual time=403.086..403.086 rows=1000000 loops=1)
                             Buckets: 131072  Batches: 16  Memory Usage: 3716kB
                             ->  Seq Scan on customers c  (cost=0.00..18236.00 rows=1000000 width=9) (actual time=0.003..180.026 rows=1000000 loops=1)
   CTE top_products
     ->  HashAggregate  (cost=65561.80..65561.84 rows=3 width=26) (actual time=1064.793..1064.794 rows=3 loops=1)
           Group Key: p.category
           ->  Hash Join  (cost=35641.00..63081.31 rows=496098 width=26) (actual time=387.022..885.881 rows=493420 loops=1)
                 Hash Cond: (r_1.product_id = p.product_id)
                 ->  CTE Scan on recent_orders r_1  (cost=0.00..9921.96 rows=496098 width=20) (actual time=0.024..100.262 rows=493420 loops=1)
                 ->  Hash  (cost=18258.00..18258.00 rows=1000000 width=14) (actual time=386.110..386.110 rows=1000000 loops=1)
                       Buckets: 131072  Batches: 16  Memory Usage: 3936kB
                       ->  Seq Scan on products p  (cost=0.00..18258.00 rows=1000000 width=14) (actual time=0.017..162.907 rows=1000000 loops=1)
   InitPlan 4 (returns $3)
     ->  Aggregate  (cost=0.07..0.08 rows=1 width=32) (actual time=397.409..397.410 rows=1 loops=1)
           ->  CTE Scan on customer_region_sales  (cost=0.00..0.06 rows=3 width=32) (actual time=0.001..397.395 rows=3 loops=1)
   ->  Sort  (cost=0.16..0.17 rows=1 width=128) (actual time=3609.368..3609.369 rows=3 loops=1)
         Sort Key: (((cr.total_sales / tp.total_sales))::numeric(10,2)) DESC
         Sort Method: quicksort  Memory: 25kB
         ->  Nested Loop  (cost=0.00..0.15 rows=1 width=128) (actual time=3609.352..3609.360 rows=3 loops=1)
               ->  CTE Scan on customer_region_sales cr  (cost=0.00..0.07 rows=1 width=64) (actual time=2544.542..2544.543 rows=1 loops=1)
                     Filter: (total_sales > $3)
                     Rows Removed by Filter: 2
               ->  CTE Scan on top_products tp  (cost=0.00..0.07 rows=1 width=64) (actual time=1064.802..1064.805 rows=3 loops=1)
                     Filter: (total_sales > '0'::numeric)
 Planning time: 0.811 ms
 Execution time: 3615.279 ms
(40 行记录)

时间:3623.223 ms
postgres=# 

修改后的脚本在DuckDB中执行结果

D .timer on
D .read test_pg.txt
Run Time (s): real 0.003 user 0.000000 sys 0.000000
Run Time (s): real 0.000 user 0.004000 sys 0.000000
Run Time (s): real 0.001 user 0.000000 sys 0.000000
Run Time (s): real 0.054 user 0.000000 sys 0.000000
Run Time (s): real 0.042 user 0.000000 sys 0.000000
Run Time (s): real 0.041 user 0.000000 sys 0.000000
Run Time (s): real 0.050 user 0.004000 sys 0.000000
Run Time (s): real 0.041 user 0.000000 sys 0.000000
Run Time (s): real 0.042 user 0.004000 sys 0.000000
100% ▕██████████████████████████████████████▏ (00:00:03.03 elapsed)     
Run Time (s): real 3.800 user 3.436000 sys 0.056000
Run Time (s): real 2.409 user 1.804000 sys 0.076000
100% ▕██████████████████████████████████████▏ (00:00:06.41 elapsed)     
Run Time (s): real 10.242 user 15.816000 sys 0.140000
Run Time (s): real 0.908 user 1.260000 sys 0.056000
Run Time (s): real 0.926 user 1.220000 sys 0.016000
Run Time (s): real 0.616 user 1.096000 sys 0.048000
Run Time (s): real 0.759 user 1.096000 sys 0.028000
Run Time (s): real 1.095 user 1.092000 sys 0.052000
Run Time (s): real 0.000 user 0.000000 sys 0.000000
┌─────────────────────────────────────┐
│┌───────────────────────────────────┐│
││    Query Profiling Information    ││
│└───────────────────────────────────┘│
└─────────────────────────────────────┘
EXPLAIN ANALYZE WITH recent_orders AS (   SELECT o.order_id, o.customer_id, o.product_id, o.amount, o.order_date   FROM orders o   WHERE o.order_date > now() - interval '180 days' ), customer_region_sales AS (   SELECT     c.region,     SUM(r.amount) AS total_sales,     COUNT(DISTINCT r.customer_id) AS unique_customers   FROM recent_orders r   JOIN customers c ON r.customer_id = c.customer_id   GROUP BY c.region ), top_products AS (   SELECT     p.category,     SUM(r.amount) AS total_sales   FROM recent_orders r   JOIN products p ON r.product_id = p.product_id   GROUP BY p.category ) SELECT   cr.region,   tp.category,   cr.total_sales AS region_sales,   tp.total_sales AS category_sales,   (cr.total_sales / tp.total_sales)::numeric(10,2) AS ratio FROM customer_region_sales cr JOIN top_products tp ON tp.total_sales > 0 WHERE cr.total_sales > (   SELECT AVG(total_sales) FROM customer_region_sales ) ORDER BY ratio DESC LIMIT 20;
┌────────────────────────────────────────────────┐
│┌──────────────────────────────────────────────┐│
││              Total Time: 0.209s              ││
│└──────────────────────────────────────────────┘│
└────────────────────────────────────────────────┘
┌───────────────────────────┐
│           QUERY           │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│      EXPLAIN_ANALYZE      │
│    ────────────────────   │
│           0 rows          │
│          (0.00s)          │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│            CTE            │
│    ────────────────────   │
│         CTE Name:         │
│       recent_orders       │
│                           ├──────────────┐
│       Table Index: 0      │              │
│                           │              │
│           0 rows          │              │
│          (0.01s)          │              │
└─────────────┬─────────────┘              │
┌─────────────┴─────────────┐┌─────────────┴─────────────┐
│         PROJECTION        ││            CTE            │
│    ────────────────────   ││    ────────────────────   │
│          order_id         ││         CTE Name:         │
│        customer_id        ││   customer_region_sales   │
│         product_id        ││                           │
│           amount          ││       Table Index: 8      ├───────────────────────────────────────────┐
│         order_date        ││                           │                                           │
│                           ││                           │                                           │
│        491,040 rows       ││           0 rows          │                                           │
│          (0.00s)          ││          (0.00s)          │                                           │
└─────────────┬─────────────┘└─────────────┬─────────────┘                                           │
┌─────────────┴─────────────┐┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐
│         TABLE_SCAN        ││         PROJECTION        │                             │           TOP_N           │
│    ────────────────────   ││    ────────────────────   │                             │    ────────────────────   │
│       Table: orders       ││__internal_decompress_strin│                             │          Top: 20          │
│   Type: Sequential Scan   ││           g(#0)           │                             │                           │
│                           ││             #1            │                             │         Order By:         │
│        Projections:       ││             #2            │                             │ CAST((cr.total_sales / tp │
│         order_date        ││                           │                             │  .total_sales) AS DECIMAL │
│          order_id         ││                           │                             │        (10,2)) DESC       │
│        customer_id        ││                           │                             │                           │
│         product_id        ││                           │                             │                           │
│           amount          ││                           │                             │                           │
│                           ││                           │                             │                           │
│          Filters:         ││                           │                             │                           │
│    (CAST(order_date AS    ││                           │                             │                           │
│  TIMESTAMP WITH TIME ZONE)││                           │                             │                           │
│   > '2025-05-09 00:33:09  ││                           │                             │                           │
│   .477814+00'::TIMESTAMP  ││                           │                             │                           │
│       WITH TIME ZONE)     ││                           │                             │                           │
│                           ││                           │                             │                           │
│        491,040 rows       ││           3 rows          │                             │           3 rows          │
│          (0.35s)          ││          (0.00s)          │                             │          (0.00s)          │
└───────────────────────────┘└─────────────┬─────────────┘                             └─────────────┬─────────────┘
                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐
                             │       HASH_GROUP_BY       │                             │         PROJECTION        │
                             │    ────────────────────   │                             │    ────────────────────   │
                             │         Groups: #0        │                             │           region          │
                             │                           │                             │          category         │
                             │        Aggregates:        │                             │        region_sales       │
                             │          sum(#1)          │                             │       category_sales      │count(DISTINCT #2)    │                             │           ratio           │
                             │                           │                             │                           │
                             │           3 rows          │                             │           3 rows          │
                             │          (0.06s)          │                             │          (0.00s)          │
                             └─────────────┬─────────────┘                             └─────────────┬─────────────┘
                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐
                             │         PROJECTION        │                             │       CROSS_PRODUCT       │
                             │    ────────────────────   │                             │    ────────────────────   │
                             │           region          │                             │                           │
                             │           amount          │                             │                           │
                             │        customer_id        │                             │                           ├───────────────────────────────────────────┐
                             │                           │                             │                           │                                           │
                             │        491,040 rows       │                             │           3 rows          │                                           │
                             │          (0.00s)          │                             │          (0.00s)          │                                           │
                             └─────────────┬─────────────┘                             └─────────────┬─────────────┘                                           │
                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐
                             │         PROJECTION        │                             │           FILTER          │                             │      NESTED_LOOP_JOIN     │
                             │    ────────────────────   │                             │    ────────────────────   │                             │    ────────────────────   │
                             │__internal_compress_string_│                             │    (total_sales > 0.00)   │                             │      Join Type: INNER     │
                             │        ubigint(#0)        │                             │                           │                             │                           │#1            │                             │                           │                             │        Conditions:        │#2            │                             │                           │                             │ CAST(total_sales AS DOUBLE├──────────────┐
                             │                           │                             │                           │                             │        ) > SUBQUERY       │              │
                             │                           │                             │                           │                             │                           │              │
                             │        491,040 rows       │                             │           3 rows          │                             │           1 row           │              │
                             │          (0.00s)          │                             │          (0.00s)          │                             │          (0.01s)          │              │
                             └─────────────┬─────────────┘                             └─────────────┬─────────────┘                             └─────────────┬─────────────┘              │
                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐┌─────────────┴─────────────┐
                             │         HASH_JOIN         │                             │         PROJECTION        │                             │          CTE_SCAN         ││         PROJECTION        │
                             │    ────────────────────   │                             │    ────────────────────   │                             │    ────────────────────   ││    ────────────────────   │
                             │      Join Type: INNER     │                             │__internal_decompress_strin│                             │        CTE Index: 8       ││ CASE  WHEN ((#1 > 1)) THEN│
                             │                           │                             │           g(#0)           │                             │                           ││   ("error"('More than one │
                             │        Conditions:        │                             │             #1            │                             │                           ││      row returned by a    │
                             │ customer_id = customer_id │                             │                           │                             │                           ││     subquery used as an   │
                             │                           │                             │                           │                             │                           ││     expression - scalar   │
                             │                           │                             │                           │                             │                           ││     subqueries can only   │
                             │                           │                             │                           │                             │                           ││    return a single row.   │
                             │                           ├──────────────┐              │                           │                             │                           ││          Use "SET         │
                             │                           │              │              │                           │                             │                           ││ scalar_subquery_error_on_m│
                             │                           │              │              │                           │                             │                           ││   ultiple_rows=false" to  │
                             │                           │              │              │                           │                             │                           ││     revert to previous    │
                             │                           │              │              │                           │                             │                           ││   behavior of returning a │
                             │                           │              │              │                           │                             │                           ││ random row.')) ELSE #0 END│
                             │                           │              │              │                           │                             │                           ││                           │
                             │        491,040 rows       │              │              │           3 rows          │                             │           3 rows          ││           1 row           │
                             │          (0.12s)          │              │              │          (0.00s)          │                             │          (0.00s)          ││          (0.00s)          │
                             └─────────────┬─────────────┘              │              └─────────────┬─────────────┘                             └───────────────────────────┘└─────────────┬─────────────┘
                             ┌─────────────┴─────────────┐┌─────────────┴─────────────┐┌─────────────┴─────────────┐                                                          ┌─────────────┴─────────────┐
                             │         TABLE_SCAN        ││          CTE_SCAN         ││       HASH_GROUP_BY       │                                                          │    UNGROUPED_AGGREGATE    │
                             │    ────────────────────   ││    ────────────────────   ││    ────────────────────   │                                                          │    ────────────────────   │
                             │      Table: customers     ││        CTE Index: 0       ││         Groups: #0        │                                                          │        Aggregates:        │Type: Sequential Scan   ││                           ││    Aggregates: sum(#1)    │                                                          │        "first"(#0)        │
                             │                           ││                           ││                           │                                                          │        count_star()       │
                             │                           ││                           ││                           │                                                          │                           │
                             │        999,996 rows       ││        491,040 rows       ││           3 rows          │                                                          │           1 row           │
                             │          (0.01s)          ││          (0.00s)          ││          (0.07s)          │                                                          │          (0.00s)          │
                             └───────────────────────────┘└───────────────────────────┘└─────────────┬─────────────┘                                                          └─────────────┬─────────────┘
                                                                                       ┌─────────────┴─────────────┐                                                          ┌─────────────┴─────────────┐
                                                                                       │         PROJECTION        │                                                          │         PROJECTION        │
                                                                                       │    ────────────────────   │                                                          │    ────────────────────   │
                                                                                       │          category         │                                                          │             #0            │
                                                                                       │           amount          │                                                          │                           │
                                                                                       │                           │                                                          │                           │
                                                                                       │        491,040 rows       │                                                          │           1 row           │
                                                                                       │          (0.00s)          │                                                          │          (0.00s)          │
                                                                                       └─────────────┬─────────────┘                                                          └─────────────┬─────────────┘
                                                                                       ┌─────────────┴─────────────┐                                                          ┌─────────────┴─────────────┐
                                                                                       │         PROJECTION        │                                                          │    UNGROUPED_AGGREGATE    │
                                                                                       │    ────────────────────   │                                                          │    ────────────────────   │
                                                                                       │__internal_compress_string_│                                                          │    Aggregates: avg(#0)    │
                                                                                       │        uhugeint(#0)       │                                                          │                           │#1            │                                                          │                           │
                                                                                       │                           │                                                          │                           │
                                                                                       │        491,040 rows       │                                                          │           1 row           │
                                                                                       │          (0.00s)          │                                                          │          (0.00s)          │
                                                                                       └─────────────┬─────────────┘                                                          └─────────────┬─────────────┘
                                                                                       ┌─────────────┴─────────────┐                                                          ┌─────────────┴─────────────┐
                                                                                       │         HASH_JOIN         │                                                          │         PROJECTION        │
                                                                                       │    ────────────────────   │                                                          │    ────────────────────   │
                                                                                       │      Join Type: INNER     │                                                          │        total_sales        │
                                                                                       │                           │                                                          │                           │
                                                                                       │        Conditions:        ├──────────────┐                                           │                           │
                                                                                       │  product_id = product_id  │              │                                           │                           │
                                                                                       │                           │              │                                           │                           │
                                                                                       │        491,040 rows       │              │                                           │           3 rows          │
                                                                                       │          (0.17s)          │              │                                           │          (0.00s)          │
                                                                                       └─────────────┬─────────────┘              │                                           └─────────────┬─────────────┘
                                                                                       ┌─────────────┴─────────────┐┌─────────────┴─────────────┐                             ┌─────────────┴─────────────┐
                                                                                       │         TABLE_SCAN        ││          CTE_SCAN         │                             │          CTE_SCAN         │
                                                                                       │    ────────────────────   ││    ────────────────────   │                             │    ────────────────────   │
                                                                                       │      Table: products      ││        CTE Index: 0       │                             │        CTE Index: 8       │
                                                                                       │   Type: Sequential Scan   ││                           │                             │                           │
                                                                                       │                           ││                           │                             │                           │
                                                                                       │        999,998 rows       ││        491,040 rows       │                             │           3 rows          │
                                                                                       │          (0.00s)          ││          (0.00s)          │                             │          (0.00s)          │
                                                                                       └───────────────────────────┘└───────────────────────────┘                             └───────────────────────────┘
Run Time (s): real 0.212 user 0.684000 sys 0.016000
D 

附:修改后脚本

-- ========================================
--  Step 1. 创建基础表结构
-- ========================================
DROP TABLE IF EXISTS customers CASCADE;
DROP TABLE IF EXISTS products CASCADE;
DROP TABLE IF EXISTS orders CASCADE;
CREATE SEQUENCE id_sequence;
CREATE SEQUENCE id_sequence2;
CREATE SEQUENCE id_sequence3;
CREATE TABLE customers (
  customer_id int PRIMARY KEY DEFAULT nextval('id_sequence'),
  name TEXT,
  region TEXT,
  join_date TIMESTAMP
);

CREATE TABLE products (
  product_id int PRIMARY KEY DEFAULT nextval('id_sequence2'),
  name TEXT,
  category TEXT,
  price NUMERIC(10,2)
);

CREATE TABLE orders (
  order_id int PRIMARY KEY DEFAULT nextval('id_sequence3'),
  customer_id INT REFERENCES customers(customer_id),
  product_id INT REFERENCES products(product_id),
  quantity INT,
  amount NUMERIC(12,2),
  order_date TIMESTAMP
);

-- ========================================
--  Step 2. 批量插入测试数据
-- ========================================

-- 2.1 插入 customers(100万)
INSERT INTO customers (name, region, join_date)
SELECT
  'Customer_' || (gs + (b-1)*100000),
  CASE WHEN random() < 0.33 THEN 'APAC'
       WHEN random() < 0.66 THEN 'EMEA'
       ELSE 'AMER' END,
  now() - printf('%.0f days', random() * 365)::interval
FROM generate_series(1,100000) AS gs(gs),generate_series(1,10)b(b) ;

-- 2.2 插入 products(100万)
INSERT INTO products (name, category, price)
SELECT
  'Product_' || (gs + (b-1)*100000),
  CASE WHEN random() < 0.5 THEN 'Electronics'
       WHEN random() < 0.8 THEN 'Clothing'
       ELSE 'Food' END,
  round((random() * 500 + 1)::numeric, 2)
FROM generate_series(1,100000) AS gs(gs),generate_series(1,10)b(b) ;

-- 2.3 插入 orders(100万)
INSERT INTO orders (customer_id, product_id, quantity, amount, order_date)
SELECT
  floor(random() * 1000000 + 1)::int,
  floor(random() * 1000000 + 1)::int,
  floor(random() * 5 + 1)::int,
  round(((floor(random()*5)+1) * (random()*500 + 1))::numeric, 2),
  now() - printf('%.0f days', random() * 365)::interval
FROM generate_series(1,100000) AS gs(gs),generate_series(1,10)b(b) ;

-- ========================================
--  Step 3. 建立索引
-- ========================================
CREATE INDEX idx_orders_cust ON orders(customer_id);
CREATE INDEX idx_orders_prod ON orders(product_id);
CREATE INDEX idx_orders_date ON orders(order_date);
CREATE INDEX idx_customers_region ON customers(region);
CREATE INDEX idx_products_cat ON products(category);

VACUUM ANALYZE;

-- ========================================
--  Step 4. 构造复杂查询(含 CTE + 子查询 + 聚合 + JOIN)
-- ========================================

EXPLAIN ANALYZE
WITH recent_orders AS (
  SELECT o.order_id, o.customer_id, o.product_id, o.amount, o.order_date
  FROM orders o
  WHERE o.order_date > now() - interval '180 days'
),
customer_region_sales AS (
  SELECT
    c.region,
    SUM(r.amount) AS total_sales,
    COUNT(DISTINCT r.customer_id) AS unique_customers
  FROM recent_orders r
  JOIN customers c ON r.customer_id = c.customer_id
  GROUP BY c.region
),
top_products AS (
  SELECT
    p.category,
    SUM(r.amount) AS total_sales
  FROM recent_orders r
  JOIN products p ON r.product_id = p.product_id
  GROUP BY p.category
)
SELECT
  cr.region,
  tp.category,
  cr.total_sales AS region_sales,
  tp.total_sales AS category_sales,
  (cr.total_sales / tp.total_sales)::numeric(10,2) AS ratio
FROM customer_region_sales cr
JOIN top_products tp ON tp.total_sales > 0
WHERE cr.total_sales > (
  SELECT AVG(total_sales) FROM customer_region_sales
)
ORDER BY ratio DESC
LIMIT 20;
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值