发现github上22个pristo查询tpch语句不能直接运行,自己转化后可以直接运行的如下:
1、
SELECT
l.returnflag,
l.linestatus,
sum(l.quantity) AS sum_qty,
sum(l.extendedprice) AS sum_base_price,
sum(l.extendedprice * (1 - l.discount)) AS sum_disc_price,
sum(l.extendedprice * (1 - l.discount) * (1 + l.tax)) AS sum_charge,
avg(l.quantity) AS avg_qty,
avg(l.extendedprice) AS avg_price,
avg(l.discount) AS avg_disc,
count(*) AS count_order
FROM
lineitem as l
WHERE
l.shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY
l.returnflag,
l.linestatus
ORDER BY
l.returnflag,
l.linestatus;
github上原本的原本语句如下,上述为我自己改写的,可以对比看看区别:
SELECT
l_returnflag,
l_linestatus,
sum(l_quantity) AS sum_qty,
sum(l_extendedprice) AS sum_base_price,
sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge,
avg(l_quantity) AS avg_qty,
avg(l_extendedprice) AS avg_price,
avg(l_discount) AS avg_disc,
count(*) AS count_order
FROM
lineitem
WHERE
l_shipdate <= DATE '1998-12-01' - INTERVAL '90' DAY
GROUP BY
l_returnflag,
l_linestatus
ORDER BY
l_returnflag,
l_linestatus
2、
SELECT
s.acctbal,
s.name,
n.name,
p.partkey,
p.mfgr,
s.address,
s.phone,
s.comment
FROM
part as p,
supplier as s,
partsupp as ps,
nation as n,
region as r
WHERE
p.partkey = ps.partkey
AND s.suppkey = ps.suppkey
AND p.size = 15
AND p.type LIKE '%BRASS'
AND s.nationkey = n.nationkey
AND n.regionkey = r.regionkey
AND r.name = 'EUROPE'
AND ps.supplycost = (
SELECT min(ps.supplycost)
FROM
partsupp as ps, supplier as s,
nation as n, region as r
WHERE
p.partkey = ps.partkey
AND s.suppkey = ps.suppkey
AND s.nationkey = n.nationkey
AND n.regionkey = r.regionkey
AND r.name = 'EUROPE'
)
ORDER BY
s.acctbal DESC,
n.name,
s.name,
p.partkey
LIMIT 100;
3、
SELECT
l.orderkey,
sum(l.extendedprice * (1 - l.discount)) AS revenue,
o.orderdate,
o.shippriority
FROM
customer as c,
orders as o,
lineitem as l
WHERE
c.mktsegment = 'BUILDING'
AND c.custkey = o.custkey
AND l.orderkey = o.orderkey
AND o.orderdate < DATE '1995-03-15'
AND l.shipdate > DATE '1995-03-15'
GROUP BY
l.orderkey,
o.orderdate,
o.shippriority
ORDER BY
revenue DESC,
o.orderdate
LIMIT 10;
4
SELECT
o.orderpriority,
count(*) AS order_count
FROM orders as o
WHERE
o.orderdate >= DATE '1993-07-01'
AND o.orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH
AND EXISTS (
SELECT *
FROM lineitem as l
WHERE
l.orderkey = o.orderkey
AND l.commitdate < l.receiptdate
)
GROUP BY
o.orderpriority
ORDER BY
o.orderpriority;
5
SELECT
n.name,
sum(l.extendedprice * (1 - l.discount)) AS revenue
FROM
customer as c,
orders as o,
lineitem as l,
supplier as s,
nation as n,
region as r
WHERE
c.custkey = o.custkey
AND l.orderkey = o.orderkey
AND l.suppkey = s.suppkey
AND c.nationkey = s.nationkey
AND s.nationkey = n.nationkey
AND n.regionkey = r.regionkey
AND r.name = 'ASIA'
AND o.orderdate >= DATE '1994-01-01'
AND o.orderdate < DATE '1994-01-01' + INTERVAL '1' YEAR
GROUP BY
n.name
ORDER BY
revenue DESC;
6
SELECT sum(l.extendedprice * l.discount) AS revenue
FROM
lineitem as l
WHERE
l.shipdate >= DATE '1994-01-01'
AND l.shipdate < DATE '1994-01-01' + INTERVAL '1' YEAR
AND l.discount BETWEEN decimal '0.06' - decimal '0.01' AND decimal '0.06' + decimal '0.01'
AND l.quantity < 24;
7
SELECT
supp_nation,
cust_nation,
l_year,
sum(volume) AS revenue
FROM (
SELECT
n1.name AS supp_nation,
n2.name AS cust_nation,
extract(YEAR FROM l.shipdate) AS l_year,
l.extendedprice * (1 - l.discount) AS volume
FROM
supplier as s,
lineitem as l,
orders as o,
customer as c,
nation as n1,
nation as n2
WHERE
s.suppkey = l.suppkey
AND o.orderkey = l.orderkey
AND c.custkey = o.custkey
AND s.nationkey = n1.nationkey
AND c.nationkey = n2.nationkey
AND (
(n1.name = 'FRANCE' AND n2.name = 'GERMANY')
OR (n1.name = 'GERMANY' AND n2.name = 'FRANCE')
)
AND l.shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
) AS shipping
GROUP BY
supp_nation,
cust_nation,
l_year
ORDER BY
supp_nation,
cust_nation,
l_year;
原来:
SELECT
supp_nation,
cust_nation,
l_year,
sum(volume) AS revenue
FROM (
SELECT
n1.n_name AS supp_nation,
n2.n_name AS cust_nation,
extract(YEAR FROM l_shipdate) AS l_year,
l_extendedprice * (1 - l_discount) AS volume
FROM
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
WHERE
s_suppkey = l_suppkey
AND o_orderkey = l_orderkey
AND c_custkey = o_custkey
AND s_nationkey = n1.n_nationkey
AND c_nationkey = n2.n_nationkey
AND (
(n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY')
OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE')
)
AND l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
) AS shipping
GROUP BY
supp_nation,
cust_nation,
l_year
ORDER BY
supp_nation,
cust_nation,
l_year
8
SELECT
o_year,
sum(CASE
WHEN nation = 'BRAZIL'
THEN volume
ELSE 0
END) / sum(volume) AS mkt_share
FROM (
SELECT
extract(YEAR FROM o.orderdate) AS o_year,
l.extendedprice * (1 - l.discount) AS volume,
n2.name AS nation
FROM
part as p,
supplier as s,
lineitem as l,
orders as o,
customer as c,
nation as n1,
nation as n2,
region as r
WHERE
p.partkey = l.partkey
AND s.suppkey = l.suppkey
AND l.orderkey = o.orderkey
AND o.custkey = c.custkey
AND c.nationkey = n1.nationkey
AND n1.regionkey = r.regionkey
AND r.name = 'AMERICA'
AND s.nationkey = n2.nationkey
AND o.orderdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
AND p.type = 'ECONOMY ANODIZED STEEL'
) AS all_nations
GROUP BY
o_year
ORDER BY
o_year;
原来:
SELECT
o_year,
sum(CASE
WHEN nation = 'BRAZIL'
THEN volume
ELSE 0
END) / sum(volume) AS mkt_share
FROM (
SELECT
extract(YEAR FROM o_orderdate) AS o_year,
l_extendedprice * (1 - l_discount) AS volume,
n2.n_name AS nation
FROM
part,
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2,
region
WHERE
p_partkey = l_partkey
AND s_suppkey = l_suppkey
AND l_orderkey = o_orderkey
AND o_custkey = c_custkey
AND c_nationkey = n1.n_nationkey
AND n1.n_regionkey = r_regionkey
AND r_name = 'AMERICA'
AND s_nationkey = n2.n_nationkey
AND o_orderdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
AND p_type = 'ECONOMY ANODIZED STEEL'
) AS all_nations
GROUP BY
o_year
ORDER BY
o_year
9
SELECT
nation,
o_year,
sum(amount) AS sum_profit
FROM (
SELECT
n.name AS nation,
extract(YEAR FROM o.orderdate) AS o_year,
l.extendedprice * (1 - l.discount) - ps.supplycost * l.quantity AS amount
FROM
part as p,
supplier as s,
lineitem as l,
partsupp as ps,
orders as o,
nation as n
WHERE
s.suppkey = l.suppkey
AND ps.suppkey = l.suppkey
AND ps.partkey = l.partkey
AND p.partkey = l.partkey
AND o.orderkey = l.orderkey
AND s.nationkey = n.nationkey
AND p.name LIKE '%green%'
) AS profit
GROUP BY
nation,
o_year
ORDER BY
nation,
o_year DESC;
原来:
SELECT
nation,
o_year,
sum(amount) AS sum_profit
FROM (
SELECT
n_name AS nation,
extract(YEAR FROM o_orderdate) AS o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount
FROM
part,
supplier,
lineitem,
partsupp,
orders,
nation
WHERE
s_suppkey = l_suppkey
AND ps_suppkey = l_suppkey
AND ps_partkey = l_partkey
AND p_partkey = l_partkey
AND o_orderkey = l_orderkey
AND s_nationkey = n_nationkey
AND p_name LIKE '%green%'
) AS profit
GROUP BY
nation,
o_year
ORDER BY
nation,
o_year DESC
10
SELECT
c.custkey,
c.name,
sum(l.extendedprice * (1 - l.discount)) AS revenue,
c.acctbal,
n.name,
c.address,
c.phone,
c.comment
FROM
customer as c,
orders as o,
lineitem as l,
nation as n
WHERE
c.custkey = o.custkey
AND l.orderkey = o.orderkey
AND o.orderdate >= DATE '1993-10-01'
AND o.orderdate < DATE '1993-10-01' + INTERVAL '3' MONTH
AND l.returnflag = 'R'
AND c.nationkey = n.nationkey
GROUP BY
c.custkey,
c.name,
c.acctbal,
c.phone,
n.name,
c.address,
c.comment
ORDER BY
revenue DESC
LIMIT 20;
11
SELECT
ps.partkey,
sum(ps.supplycost * ps.availqty) AS value
FROM
partsupp as ps,
supplier as s,
nation as n
WHERE
ps.suppkey = s.suppkey
AND s.nationkey = n.nationkey
AND n.name = 'GERMANY'
GROUP BY
ps.partkey
HAVING
sum(ps.supplycost * ps.availqty) > (
SELECT sum(ps.supplycost * ps.availqty) * 0.0001
FROM
partsupp as ps,
supplier as s,
nation as n
WHERE
ps.suppkey = s.suppkey
AND s.nationkey = n.nationkey
AND n.name = 'GERMANY'
)
ORDER BY
value DESC;
12
SELECT
l.shipmode,
sum(CASE
WHEN o.orderpriority = '1-URGENT'
OR o.orderpriority = '2-HIGH'
THEN 1
ELSE 0
END) AS high_line_count,
sum(CASE
WHEN o.orderpriority <> '1-URGENT'
AND o.orderpriority <> '2-HIGH'
THEN 1
ELSE 0
END) AS low_line_count
FROM
orders as o,
lineitem as l
WHERE
o.orderkey = l.orderkey
AND l.shipmode IN ('MAIL', 'SHIP')
AND l.commitdate < l.receiptdate
AND l.shipdate < l.commitdate
AND l.receiptdate >= DATE '1994-01-01'
AND l.receiptdate < DATE '1994-01-01' + INTERVAL '1' YEAR
GROUP BY
l.shipmode
ORDER BY
l.shipmode;
13
SELECT
c_count,
COUNT(*) AS custdist
FROM (
SELECT
c.custkey,
COUNT(o.orderkey) as c_count
FROM
customer as c
LEFT OUTER JOIN
orders as o
ON
c.custkey = o.custkey
AND o.comment NOT LIKE '%special%requests%'GROUP BY
c.custkey) AS t
GROUP BY
c_count
ORDER BY
custdist DESC,
c_count DESC;
原来:
SELECT
c_count,
count(*) AS custdist
FROM (
SELECT
c_custkey,
count(o_orderkey)
FROM
customer
LEFT OUTER JOIN orders ON
c_custkey = o_custkey
AND o_comment NOT LIKE '%special%requests%'
GROUP BY
c_custkey
) AS c_orders (c_custkey, c_count)
GROUP BY
c_count
ORDER BY
custdist DESC,
c_count DESC
网上解觉方案:
SELECT
c_count,
COUNT(*) AS custdist
FROM (
SELECT
c_custkey,
COUNT(o_orderkey) as c_count
FROM
test-187010.TPCH.customer
LEFT OUTER JOIN
orders
ON
c_custkey = o_custkey
AND o_comment NOT LIKE '%special%requests%'GROUP BY
c_custkey) AS c_orders
GROUP BY
c_count
ORDER BY
custdist DESC,
c_count DESC;
14
SELECT 100.00 * sum(CASE
WHEN p.type LIKE 'PROMO%'
THEN l.extendedprice * (1 - l.discount)
ELSE 0
END) / sum(l.extendedprice * (1 - l.discount)) AS promo_revenue
FROM
lineitem as l,
part as p
WHERE
l.partkey = p.partkey
AND l.shipdate >= DATE '1995-09-01'
AND l.shipdate < DATE '1995-09-01' + INTERVAL '1' MONTH
15
SELECT
s.suppkey,
s.name,
s.address,
s.phone,
r.total_revenue
FROM( SELECT
l.suppkey AS supplier_no,
sum(l.extendedprice * (1 - l.discount)) AS total_revenue
FROM
lineitem as l
WHERE
l.shipdate >= DATE '1996-01-01'
AND l.shipdate < DATE '1996-01-01' + INTERVAL '3' MONTH
GROUP BY
l.suppkey) as r,
supplier as s
WHERE
s.suppkey = r.supplier_no
AND r.total_revenue = (
SELECT max(r.total_revenue)
FROM
( SELECT
l.suppkey AS supplier_no,
sum(l.extendedprice * (1 - l.discount)) AS total_revenue
FROM
lineitem as l
WHERE
l.shipdate >= DATE '1996-01-01'
AND l.shipdate < DATE '1996-01-01' + INTERVAL '3' MONTH
GROUP BY
l.suppkey) as r
)
ORDER BY
s.suppkey;
原来:
CREATE OR REPLACE VIEW revenue AS
SELECT
l_suppkey AS supplier_no,
sum(l_extendedprice * (1 - l_discount)) AS total_revenue
FROM
lineitem
WHERE
l_shipdate >= DATE '1996-01-01'
AND l_shipdate < DATE '1996-01-01' + INTERVAL '3' MONTH
GROUP BY
l_suppkey;
SELECT
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
FROM
supplier,
revenue
WHERE
s_suppkey = supplier_no
AND total_revenue = (
SELECT max(total_revenue)
FROM
revenue
)
ORDER BY
s_suppkey;
16
SELECT
p.brand,
p.type,
p.size,
count(DISTINCT ps.suppkey) AS supplier_cnt
FROM
partsupp as ps,
part as p
WHERE
p.partkey = ps.partkey
AND p.brand <> 'Brand#45'
AND p.type NOT LIKE 'MEDIUM POLISHED%'
AND p.size IN (49, 14, 23, 45, 19, 3, 36, 9)
AND ps.suppkey NOT IN (
SELECT s.suppkey
FROM
supplier as s
WHERE
s.comment LIKE '%Customer%Complaints%'
)
GROUP BY
p.brand,
p.type,
p.size
ORDER BY
supplier_cnt DESC,
p.brand,
p.type,
p.size;
17
SELECT sum(l.extendedprice) / 7.0 AS avg_yearly
FROM
lineitem as l,
part as p
WHERE
p.partkey = l.partkey
AND p.brand = 'Brand#23'
AND p.container = 'MED BOX'
AND l.quantity < (
SELECT 0.2 * avg(l.quantity)
FROM
lineitem as l
WHERE
l.partkey = p.partkey
)
18
SELECT
c.name,
c.custkey,
o.orderkey,
o.orderdate,
o.totalprice,
sum(l.quantity)
FROM
customer as c,
orders as o,
lineitem as l
WHERE
o.orderkey IN (
SELECT l.orderkey
FROM
lineitem as l
GROUP BY
l.orderkey
HAVING
sum(l.quantity) > 300
)
AND c.custkey = o.custkey
AND o.orderkey = l.orderkey
GROUP BY
c.name,
c.custkey,
o.orderkey,
o.orderdate,
o.totalprice
ORDER BY
o.totalprice DESC,
o.orderdate
LIMIT 100;
19
SELECT sum(l.extendedprice * (1 - l.discount)) AS revenue
FROM
lineitem as l,
part as p
WHERE
(
p.partkey = l.partkey
AND p.brand = 'Brand#12'
AND p.container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
AND l.quantity >= 1 AND l.quantity <= 1 + 10
AND p.size BETWEEN 1 AND 5
AND l.shipmode IN ('AIR', 'AIR REG')
AND l.shipinstruct = 'DELIVER IN PERSON'
)
OR
(
p.partkey = l.partkey
AND p.brand = 'Brand#23'
AND p.container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
AND l.quantity >= 10 AND l.quantity <= 10 + 10
AND p.size BETWEEN 1 AND 10
AND l.shipmode IN ('AIR', 'AIR REG')
AND l.shipinstruct = 'DELIVER IN PERSON'
)
OR
(
p.partkey = l.partkey
AND p.brand = 'Brand#34'
AND p.container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
AND l.quantity >= 20 AND l.quantity <= 20 + 10
AND p.size BETWEEN 1 AND 15
AND l.shipmode IN ('AIR', 'AIR REG')
AND l.shipinstruct = 'DELIVER IN PERSON'
);
20
SELECT
s.name,
s.address
FROM
supplier as s, nation as n
WHERE
s.suppkey IN (
SELECT ps.suppkey
FROM
partsupp as ps
WHERE
ps.partkey IN (
SELECT p.partkey
FROM
part as p
WHERE
p.name LIKE 'forest%'
)
AND ps.availqty > (
SELECT 0.5 * sum(l.quantity)
FROM
lineitem as l
WHERE
l.partkey = ps.partkey
AND l.suppkey = ps.suppkey
AND l.shipdate >= date('1994-01-01')
AND l.shipdate < date('1994-01-01') + interval '1' YEAR
)
)
AND s.nationkey = n.nationkey
AND n.name = 'CANADA'
ORDER BY s.name;
21
SELECT
s.name,
count(*) AS numwait
FROM
supplier as s,
lineitem as l1,
orders as o,
nation as n
WHERE
s.suppkey = l1.suppkey
AND o.orderkey = l1.orderkey
AND o.orderstatus = 'F'
AND l1.receiptdate > l1.commitdate
AND exists(
SELECT *
FROM
lineitem as l2
WHERE
l2.orderkey = l1.orderkey
AND l2.suppkey <> l1.suppkey
)
AND NOT exists(
SELECT *
FROM
lineitem as l3
WHERE
l3.orderkey = l1.orderkey
AND l3.suppkey <> l1.suppkey
AND l3.receiptdate > l3.commitdate
)
AND s.nationkey = n.nationkey
AND n.name = 'SAUDI ARABIA'
GROUP BY
s.name
ORDER BY
numwait DESC,
s.name
LIMIT 100;
原来:
SELECT
s_name,
count(*) AS numwait
FROM
supplier,
lineitem l1,
orders,
nation
WHERE
s_suppkey = l1.l_suppkey
AND o_orderkey = l1.l_orderkey
AND o_orderstatus = 'F'
AND l1.l_receiptdate > l1.l_commitdate
AND exists(
SELECT *
FROM
lineitem l2
WHERE
l2.l_orderkey = l1.l_orderkey
AND l2.l_suppkey <> l1.l_suppkey
)
AND NOT exists(
SELECT *
FROM
lineitem l3
WHERE
l3.l_orderkey = l1.l_orderkey
AND l3.l_suppkey <> l1.l_suppkey
AND l3.l_receiptdate > l3.l_commitdate
)
AND s_nationkey = n_nationkey
AND n_name = 'SAUDI ARABIA'
GROUP BY
s_name
ORDER BY
numwait DESC,
s_name
LIMIT 100
22
SELECT
cntrycode,
count(*) AS numcust,
sum(c_acctbal) AS totacctbal
FROM (
SELECT
substr(c.phone, 1, 2) AS cntrycode,
c.acctbal as c_acctbal
FROM
customer as c
WHERE
substr(c.phone, 1, 2) IN
('13', '31', '23', '29', '30', '18', '17')
AND c.acctbal > (
SELECT avg(c.acctbal)
FROM
customer as c
WHERE
c.acctbal > 0.00
AND substr(c.phone, 1, 2) IN
('13', '31', '23', '29', '30', '18', '17')
)
AND NOT exists(
SELECT *
FROM
orders as o
WHERE
o.custkey = c.custkey
)
) AS custsale
GROUP BY
cntrycode
ORDER BY
cntrycode;
原来:
SELECT
cntrycode,
count(*) AS numcust,
sum(c_acctbal) AS totacctbal
FROM (
SELECT
substr(c_phone, 1, 2) AS cntrycode,
c_acctbal
FROM
customer
WHERE
substr(c_phone, 1, 2) IN
('13', '31', '23', '29', '30', '18', '17')
AND c_acctbal > (
SELECT avg(c_acctbal)
FROM
customer
WHERE
c_acctbal > 0.00
AND substr(c_phone, 1, 2) IN
('13', '31', '23', '29', '30', '18', '17')
)
AND NOT exists(
SELECT *
FROM
orders
WHERE
o_custkey = c_custkey
)
) AS custsale
GROUP BY
cntrycode
ORDER BY
cntrycode