1. 表的加减法
新建表 tbl_product2 ,供后面学习。
create table tbl_product2
(product_id char(4) primary key,
product_name varchar(100) not null,
product_typevarchar(32) not null,
sale_price int,
purchase_price int,
register_date date);
desc tbl_product;
+----------------+--------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+----------------+--------------+------+-----+---------+-------+
| product_id | char(4) | NO | PRI | NULL | |
| product_name | varchar(100) | NO | | NULL | |
| product_type | varchar(32) | NO | | NULL | |
| sale_price | int | YES | | NULL | |
| purchase_price | int | YES | | NULL | |
| register_date | date | YES | | NULL | |
+----------------+--------------+------+-----+---------+-------+
insert into tbl_product2 values('0001', 'T-shirt', 'clothes', 1000, 500, '2009-09-20');
insert into tbl_product2 values ('0002', 'puncher', 'Office supplies', 500, 320, '2009-09-11');
insert into tbl_product2 values ('0003', 'sports T-shirt', 'clothes', 4000, 2800, NULL);
insert into tbl_product2 values ('0009', 'gloves', 'clothes', 800, 500, NULL);
insert into tbl_product2 values ('0010', 'kettle', 'kitchenware', 2000, 1700, '2009-09-20');
select * from tbl_product2;
+------------+----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0009 | gloves | clothes | 800 | 500 | NULL |
| 0010 | kettle | kitchenware | 2000 | 1700 | 2009-09-20 |
+------------+----------------+-----------------+------------+----------------+---------------+
1.1 什么是集合运算
在标准 SQL 中, 分别对检索结果使用 UNION
, INTERSECT,
EXCEPT
来将检索结果进行并、交和差运算。
1.2 表的加法 UNION
1.2.1 UNION
SELECT product_id, product_name
FROM tbl_product
UNION
SELECT product_id, product_name
FROM tbl_product2;
+------------+-----------------+
| product_id | product_name |
+------------+-----------------+
| 0001 | T-shirt |
| 0002 | puncher |
| 0003 | sports T-shirt |
| 0004 | kitchen knife |
| 0005 | pressure cooker |
| 0006 | fork |
| 0007 | grater |
| 0008 | ballpoint |
| 0009 | gloves |
| 0010 | kettle |
+------------+-----------------+

练习题: 假设连锁店想要增加毛利率超过 50%或者售价低于 800 的货物的存货量, 请使用 UNION 对分别满足上述两个条件的商品的查询结果求并集。
SELECT *
FROM (
SELECT *
FROM tbl_product
UNION
SELECT *
FROM tbl_product2
) AS p
WHERE p.sale_price < 800 OR p.sale_price > 1.5 * p.purchase_price;
+------------+--------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
| 0009 | gloves | clothes | 800 | 500 | NULL |
+------------+--------------+-----------------+------------+----------------+---------------+
1.2.2 UNION 与 OR 谓词
若要将两个不同的表中的结果合并在一起,就不得不使用 UNION 了。而且, 即便是对于同一张表, 有时也会出于查询效率方面的因素来使用 UNION。
练习题: 分别使用 UNION 或者 OR 谓词,找出毛利率不足 30%或毛利率未知的商品。
# 使用 UNION
SELECT product_id, product_name, sale_price, purchase_price, sale_price - purchase_price AS profit
FROM tbl_product
WHERE sale_price < 1.3 * purchase_price
UNION
SELECT product_id, product_name, sale_price, purchase_price, sale_price - purchase_price AS profit
FROM tbl_product
WHERE sale_price IS NULL;
+------------+---------------+------------+----------------+--------+
| product_id | product_name | sale_price | purchase_price | profit |
+------------+---------------+------------+----------------+--------+
| 0004 | kitchen knife | 3000 | 2800 | 200 |
| 0007 | grater | 880 | 790 | 90 |
+------------+---------------+------------+----------------+--------+
# 使用 OR
SELECT product_id, product_name, sale_price, purchase_price, sale_price - purchase_price AS profit
FROM tbl_product
WHERE sale_price < 1.3 * purchase_price OR sale_price IS NULL;
+------------+---------------+------------+----------------+--------+
| product_id | product_name | sale_price | purchase_price | profit |
+------------+---------------+------------+----------------+--------+
| 0004 | kitchen knife | 3000 | 2800 | 200 |
| 0007 | grater | 880 | 790 | 90 |
+------------+---------------+------------+----------------+--------+
1.2.3 包含重复行的集合运算 UNION ALL
SQL 语句的 UNION 会对两个查询的结果集进行合并和去重,这种去重不仅会去掉两个结果集相互重复的, 还会去掉一个结果集中的重复行。但在实践中有时候需要需要不去重的并集,在 UNION 的结果中保留重复行的语法其实非常简单,只需要在 UNION 后面添加 ALL 关键字就可以了。
例如, 想要知道 tbl_product 和 tbl_product2 中所包含的商品种类及每种商品的数量。
第一步,就需要将两个表的商品种类字段选出来,然后使用 UNION ALL 进行不去重地合并。
第一步,接下来再对两个表的结果按 product_type 字段分组计数。
SELECT product_id, product_name
FROM tbl_product
UNION ALL
SELECT product_id, product_name
FROM tbl_product2;
+------------+-----------------+
| product_id | product_name |
+------------+-----------------+
| 0001 | T-shirt |
| 0002 | puncher |
| 0003 | sports T-shirt |
| 0004 | kitchen knife |
| 0005 | pressure cooker |
| 0006 | fork |
| 0007 | grater |
| 0008 | ballpoint |
| 0001 | T-shirt |
| 0002 | puncher |
| 0003 | sports T-shirt |
| 0009 | gloves |
| 0010 | kettle |
+------------+-----------------+
练习题: 商店决定对 tbl_product 表中 利润低于50% 或者 售价低于1000的商品提价,请使用 UNION ALL 语句将分别满足上述两个条件的结果取并集。 查询结果类似下表:

# 利润率 = (销售价 - 成本价) / 成本价,(销售价 - 成本价) / 成本价 < 0.5 等价于 销售价 < 1.5*成本价
# 第一步:查询 利润低于50% 的商品,purchase_price 为 NULL 按 50 处理。
SELECT *
FROM tbl_product
WHERE sale_price < 1.5 * IFNULL(purchase_price, 50);
+------------+-----------------+--------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+-----------------+--------------+------------+----------------+---------------+
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0005 | pressure cooker | kitchenware | 6800 | 5000 | 2009-01-15 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
+------------+-----------------+--------------+------------+----------------+---------------+
# 第二步:查询 售价低于1000的商品
SELECT *
FROM tbl_product
WHERE sale_price < 1000;
+------------+--------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+-----------------+------------+----------------+---------------+
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
+------------+--------------+-----------------+------------+----------------+---------------+
# 第三步:使用 UNION ALL 语句将 第一步 和 第二步 的结果取并集
SELECT * FROM tbl_product
WHERE sale_price < 1.5 * IFNULL(purchase_price, 50)
UNION ALL
SELECT * FROM tbl_product
WHERE sale_price < 1000;
+------------+-----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+-----------------+-----------------+------------+----------------+---------------+
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0005 | pressure cooker | kitchenware | 6800 | 5000 | 2009-01-15 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
+------------+-----------------+-----------------+------------+----------------+---------------+
1.2.4 隐式数据类型转换
通常来说, 会把类型完全一致,并且代表相同属性的列使用 UNION 合并到一起显示。但有时候,即使数据类型不完全相同,也会通过隐式类型转换来将两个类型不同的列放在一列里显示,例如字符串和数值类型:
SELECT product_id, product_name, '1'
FROM tbl_product
UNION
SELECT product_id, product_name, sale_price
FROM tbl_product2
+------------+-----------------+------+
| product_id | product_name | 1 |
+------------+-----------------+------+
| 0001 | T-shirt | 1 |
| 0002 | puncher | 1 |
| 0003 | sports T-shirt | 1 |
| 0004 | kitchen knife | 1 |
| 0005 | pressure cooker | 1 |
| 0006 | fork | 1 |
| 0007 | grater | 1 |
| 0008 | ballpoint | 1 |
| 0001 | T-shirt | 1000 |
| 0002 | puncher | 500 |
| 0003 | sports T-shirt | 4000 |
| 0009 | gloves | 800 |
| 0010 | kettle | 2000 |
+------------+-----------------+------+
注意: hive中进行join关联时,关联列要避免使用隐式数据类型转换,否则容易导致数据倾斜
练习题: 使用 SYSDATE() 函数可以返回当前日期时间, 是一个日期时间类型的数据, 试测试该数据类型和数值、字符串等类型的兼容性。
例如, 以下代码可以正确执行, 说明时间日期类型和字符串,数值以及缺失值均能兼容。
SELECT SYSDATE(), SYSDATE(), SYSDATE()
UNION
SELECT 'chars', 123, NULL;
+---------------------+---------------------+---------------------+
| SYSDATE() | SYSDATE() | SYSDATE() |
+---------------------+---------------------+---------------------+
| 2021-08-16 15:47:12 | 2021-08-16 15:47:12 | 2021-08-16 15:47:12 |
| chars | 123 | NULL |
+---------------------+---------------------+---------------------+
SELECT 'chars', 123, NULL
UNION
SELECT SYSDATE(), SYSDATE(), SYSDATE();
+---------------------+---------------------+---------------------+
| chars | 123 | NULL |
+---------------------+---------------------+---------------------+
| chars | 123 | NULL |
| 2021-08-16 15:48:12 | 2021-08-16 15:48:12 | 2021-08-16 15:48:12 |
+---------------------+---------------------+---------------------+
1.3 交运算 INTERSECT
1.3.1 MySQL 8.0 不支持交运算 INTERSECT。可以用 INNER JOIN 来求得交集
SELECT product_id, product_name
FROM tbl_product
INTERSECT
SELECT product_id, product_name
FROM tbl_product2;
# ERROR 1064 (42000): You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'SELECT product_id, product_name FROM tbl_product2' at line 1
需要用 INNER JOIN 来求得交集
SELECT p1.product_id, p1.product_name
FROM tbl_product p1
INNER JOIN tbl_product2 p2
ON p1.product_id = p2.product_id;
+------------+----------------+
| product_id | product_name |
+------------+----------------+
| 0001 | T-shirt |
| 0002 | puncher |
| 0003 | sports T-shirt |
+------------+----------------+
1.4 差集 补集与表的减法
1.4.1 MySQL 8.0 还不支持 EXCEPT 运算
可以借助 NOT IN 或 NOT EXISTS,实现表的减法。
练习题: 找出只存在于 tbl_product 表但不存在于 tbl_product2 表的商品。
SELECT product_id, product_name
FROM tbl_product
WHERE product_id NOT IN(SELECT product_id FROM tbl_product2);
+------------+-----------------+
| product_id | product_name |
+------------+-----------------+
| 0004 | kitchen knife |
| 0005 | pressure cooker |
| 0006 | fork |
| 0007 | grater |
| 0008 | ballpoint |
+------------+-----------------+
SELECT product_id, product_name
FROM tbl_product AS p1
WHERE NOT EXISTS(
SELECT product_id
FROM tbl_product2 p2
WHERE p1.product_id = p2.product_id
);
+------------+-----------------+
| product_id | product_name |
+------------+-----------------+
| 0004 | kitchen knife |
| 0005 | pressure cooker |
| 0006 | fork |
| 0007 | grater |
| 0008 | ballpoint |
+------------+-----------------+
1.4.2 EXCEPT 与 NOT 谓词
练习题: 使用 NOT 谓词进行集合的减法运算, 求出 tbl_product 表中, 售价高于2000,但利润低于30%的商品。
SELECT *
FROM tbl_product
WHERE sale_price > 2000 AND sale_price > 1.3 * purchase_price;
+------------+---------------+--------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+---------------+--------------+------------+----------------+---------------+
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
+------------+---------------+--------------+------------+----------------+---------------+
# 或者
SELECT *
FROM tbl_product
WHERE sale_price > 2000 AND product_id NOT IN(
SELECT product_id
FROM tbl_product
WHERE sale_price > 1.3 * purchase_price
);
+------------+---------------+--------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+---------------+--------------+------------+----------------+---------------+
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
+------------+---------------+--------------+------------+----------------+---------------+
1.4.3 交运算 INTERSECT 与 AND 谓词
对于同一个表的两个查询结果而言, 他们的交运算 INTERSECT 可以等价与 将两个查询的检索条件用 AND 谓词连接来实现。
练习题: 使用 AND 谓词查找 tbl_product 表中利润率高于50%,并且售价低于1500的商品。
SELECT *
FROM tbl_product
WHERE sale_price < 1500 AND sale_price > 1.5 * purchase_price;
+------------+--------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
+------------+--------------+-----------------+------------+----------------+---------------+
# 或
SELECT *
FROM tbl_product
WHERE sale_price < 1500 AND product_id IN(
SELECT product_id
FROM tbl_product
WHERE sale_price > 1.5 * purchase_price
);
+------------+--------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
+------------+--------------+-----------------+------------+----------------+---------------+
1.4.4 对称差
两个集合A、B的对称差:是指那些仅属于A或仅属于B的元素构成的集合。对称差也是个非常基础的运算。例如,两个集合的交就可以看作是两个集合的并去掉两个集合的对称差。
上述方法在其他数据库里也可以用来简单地实现表或查询结果的对称差运算:首先使用 UNION 求两个表的并集,然后使用 INTERSECT 求两个表的交集, 然后用并集减去交集, 就得到了对称差。
但由于在MySQL 8.0 里,由于两个表或查询结果的并不能直接求出来,因此并不适合使用上述思路来求对称差。好在还有差集运算可以使用。从直观上就能看出来,两个集合的对称差等于 ( A − B ) ∪ ( B − A ) (A - B) \cup (B-A) (A−B)∪(B−A),因此实践中可以用这个思路来求对称差。

练习题: 使用 tbl_product 表和 tbl_product2 表的对称差来查询哪些商品只在其中一张表
第一步:计算对称差
# 1. 利用 not in 计算 A - B
SELECT *
FROM tbl_product
WHERE product_id NOT IN (
SELECT product_id
FROM tbl_product2
);
+------------+-----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+-----------------+-----------------+------------+----------------+---------------+
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0005 | pressure cooker | kitchenware | 6800 | 5000 | 2009-01-15 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
+------------+-----------------+-----------------+------------+----------------+---------------+
# 2. 同理 计算 B - A
SELECT *
FROM tbl_product2
WHERE product_id NOT IN (
SELECT product_id
FROM tbl_product
);
+------------+--------------+--------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+--------------+------------+----------------+---------------+
| 0009 | gloves | clothes | 800 | 500 | NULL |
| 0010 | kettle | kitchenware | 2000 | 1700 | 2009-09-20 |
+------------+--------------+--------------+------------+----------------+---------------+
第二步:利用 UNION 计算 (A - B)∪(B - A)
SELECT *
FROM tbl_product
WHERE product_id NOT IN (
SELECT product_id
FROM tbl_product2
)
UNION
SELECT *
FROM tbl_product2
WHERE product_id NOT IN (
SELECT product_id
FROM tbl_product
);
+------------+-----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+-----------------+-----------------+------------+----------------+---------------+
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0005 | pressure cooker | kitchenware | 6800 | 5000 | 2009-01-15 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
| 0009 | gloves | clothes | 800 | 500 | NULL |
| 0010 | kettle | kitchenware | 2000 | 1700 | 2009-09-20 |
+------------+-----------------+-----------------+------------+----------------+---------------+
1.4.5 借助并集和差集迂回实现交集运算 INTERSECT
通过文氏图,两个集合的交集可以看作是两个集合的并去掉两个集合的对称差,即 A ∩ B = A ∪ B − ( A − B ) ∪ ( B − A ) A \cap B = A \cup B - (A - B) \cup (B - A) A∩B=A∪B−(A−B)∪(B−A)。
2. 联结 (JOIN)
上面的集合运算都是以行方向为单位进行操作。通俗地说,就是进行这些集合运算时,会导致记录行数的增减。使用 UNION 会增加记录行数,而使用 INTERSECT 或者 EXCEPT 会减少记录行数。
但这些运算不能改变列的变化,虽然使用函数或者 CASE 表达式等列运算,可以增加列的数量,但仍然只能从一张表中提供的基础信息列中获得一些"引申列",本质上并不能提供更多的信息。如果想要从多个表获取信息,例如,如果我们想要找出某个商店里的衣服类商品的名称,数量及价格等信息,则必须分别从 tbl_shop_product 表和 tbl_product 表获取信息。

连结 (JOIN) 就是使用某种关联条件(一般是使用相等判断谓词"="),将其他表中的列添加过来,进行 “添加列” 的集合运算。
连结分为:
- 内连接
- 等值连接
- 非等值连接
- 自连接
- 外连接
- 左外连接
- 右外连接
- 全外连接
- 交叉连接
2.1 内连结 (INNER JOIN)
-- 内连结语法
FROM <tbl_name_1> INNER JOIN <tbl_name_2> ON <condition(s)>
# 创建商店表 tbl_shop_product
CREATE TABLE IF NOT EXISTS tbl_shop_product
(
shop_id CHAR(4),
shop_name VARCHAR(100) NOT NULL,
product_id CHAR(4) NOT NULL,
quantity INT,
PRIMARY KEY (shop_id, product_id)
);
INSERT INTO tbl_shop_product VALUES('000A', 'Tokyo', '0001', 30);
INSERT INTO tbl_shop_product VALUES('000A', 'Tokyo', '0002', 50);
INSERT INTO tbl_shop_product VALUES('000A', 'Tokyo', '0003', 15);
INSERT INTO tbl_shop_product VALUES('000B', 'Nagoya', '0002', 30);
INSERT INTO tbl_shop_product VALUES('000B', 'Nagoya', '0003', 120);
INSERT INTO tbl_shop_product VALUES('000B', 'Nagoya', '0004', 20);
INSERT INTO tbl_shop_product VALUES('000B', 'Nagoya', '0006', 10);
INSERT INTO tbl_shop_product VALUES('000B', 'Nagoya', '0007', 40);
INSERT INTO tbl_shop_product VALUES('000C', 'Osaka', '0003', 20);
INSERT INTO tbl_shop_product VALUES('000C', 'Osaka', '0004', 50);
2.1.1 使用内连结从两个表获取信息
找出某个商店里衣服类的商品信息。进一步把这个问题明确化:找出东京商店里衣服类的商品信息。
select * from tbl_shop_product;
+---------+-----------+------------+----------+
| shop_id | shop_name | product_id | quantity |
+---------+-----------+------------+----------+
| 000A | Tokyo | 0001 | 30 |
| 000A | Tokyo | 0002 | 50 |
| 000A | Tokyo | 0003 | 15 |
| 000B | Nagoya | 0002 | 30 |
| 000B | Nagoya | 0003 | 120 |
| 000B | Nagoya | 0004 | 20 |
| 000B | Nagoya | 0006 | 10 |
| 000B | Nagoya | 0007 | 40 |
| 000C | Osaka | 0003 | 20 |
| 000C | Osaka | 0004 | 50 |
+---------+-----------+------------+----------+
select * from tbl_product;
+------------+-----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+-----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0004 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0005 | pressure cooker | kitchenware | 6800 | 5000 | 2009-01-15 |
| 0006 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0008 | ballpoint | Office supplies | 100 | NULL | 2009-11-11 |
+------------+-----------------+-----------------+------------+----------------+---------------+
关键点:找出一个类似于 “轴” 或者 “桥梁” 的公共列,将两张表用这个列连结起来。这就是连结运算所要作的事情。
对比一下上述两张表,发现 product_id 列是一个公共列,因此很自然的事情就是用这个 product_id 列来作为连接的“桥梁”,将 tbl_product 和 tbl_shop_product 这两张表连接起来。
第一步:找出每个商店的商店编号、商店名称、商品编号、商品名称、商品类别信息。
第二步:在 FROM 子句中使用 INNER JOIN 将两张表连接起来,并为 ON 子句指定连结条件为tbl_shop_product.product_id=tbl_product.product_id
。
select
sp.shop_id AS shop_id,
sp.shop_name AS shop_name,
p.product_id AS product_id,
p.product_name AS product_name,
p.product_type AS product_type
from tbl_product AS p
inner join tbl_shop_product AS sp
on p.product_id = sp.product_id;
+---------+-----------+------------+----------------+-----------------+
| shop_id | shop_name | product_id | product_name | product_type |
+---------+-----------+------------+----------------+-----------------+
| 000A | Tokyo | 0001 | T-shirt | clothes |
| 000A | Tokyo | 0002 | puncher | Office supplies |
| 000A | Tokyo | 0003 | sports T-shirt | clothes |
| 000B | Nagoya | 0002 | puncher | Office supplies |
| 000B | Nagoya | 0003 | sports T-shirt | clothes |
| 000B | Nagoya | 0004 | kitchen knife | kitchenware |
| 000B | Nagoya | 0006 | fork | kitchenware |
| 000B | Nagoya | 0007 | grater | kitchenware |
| 000C | Osaka | 0003 | sports T-shirt | clothes |
| 000C | Osaka | 0004 | kitchen knife | kitchenware |
+---------+-----------+------------+----------------+-----------------+
关于内连结,需要注意以下三点:
-
进行连结时需要在 FROM 子句中使用多张表.
-
必须使用 ON 子句来指定连结条件.
-
SELECT 子句中的列最好按照 表名.列名 的格式来使用。
2.1.2 结合 WHERE 子句使用内连结
如果需要在使用内连结的时候同时使用 WHERE 子句对检索结果进行筛选, 则需要把 WHERE 子句写在 ON 子句的后边。
增加 WHERE 子句的方式有好几种,先从最简单的说起。
第一种:增加 WEHRE 子句的方式,就是把上述查询作为子查询, 用括号封装起来, 然后在外层查询增加筛选条件。
select *
from (
select
sp.shop_id AS shop_id,
sp.shop_name AS shop_name,
p.product_id AS product_id,
p.product_name AS product_name,
p.product_type AS product_type
from tbl_product AS p
inner join tbl_shop_product AS sp
on p.product_id = sp.product_id
) as p_sp
where p_sp.shop_name = 'Nagoya' and p_sp.product_type = 'clothes';
+---------+-----------+------------+----------------+--------------+
| shop_id | shop_name | product_id | product_name | product_type |
+---------+-----------+------------+----------------+--------------+
| 000B | Nagoya | 0003 | sports T-shirt | clothes |
+---------+-----------+------------+----------------+--------------+
第二种:WHERE 子句将在 FROM 子句之后执行, 也就是说, 在做完 INNER JOIN … ON 得到一个新表后, 才会执行 WHERE 子句。就可以将筛选条件加在 WHERE子句。
select
sp.shop_id AS shop_id,
sp.shop_name AS shop_name,
p.product_id AS product_id,
p.product_name AS product_name,
p.product_type AS product_type
from tbl_product AS p
inner join tbl_shop_product AS sp
on p.product_id = sp.product_id
where sp.shop_name = 'Nagoya'
and p.product_type = 'clothes';
+---------+-----------+------------+----------------+--------------+
| shop_id | shop_name | product_id | product_name | product_type |
+---------+-----------+------------+----------------+--------------+
| 000B | Nagoya | 0003 | sports T-shirt | clothes |
+---------+-----------+------------+----------------+--------------+
执行顺序: 1 FROM -> 2 WHERE -> 3 GROUP BY -> 4 HAVING -> 5 SELECT -> 6 ORDER BY
也就是说, 两张表是先连结, 得到了一张新表, 然后 WHERE 子句对这张新表的行按照两个条件进行了筛选, 最后, SELECT 子句选出了那些我们需要的列。
第三种:将 WHERE 子句中的条件直接添加在 ON 子句中, 这时候 ON 子句后最好用括号将连结条件和筛选条件括起来。一种不是很常见的做法。
select
sp.shop_id AS shop_id,
sp.shop_name AS shop_name,
p.product_id AS product_id,
p.product_name AS product_name,
p.product_type AS product_type
from tbl_product AS p
inner join tbl_shop_product AS sp
on p.product_id = sp.product_id
and sp.shop_name = 'Nagoya'
and p.product_type = 'clothes';
+---------+-----------+------------+----------------+--------------+
| shop_id | shop_name | product_id | product_name | product_type |
+---------+-----------+------------+----------------+--------------+
| 000B | Nagoya | 0003 | sports T-shirt | clothes |
+---------+-----------+------------+----------------+--------------+
缺点
-
不是太容易阅读。
-
先连结再筛选的标准写法的执行顺序是,两张完整的表做了连结之后再做筛选。如果要连结多张表,或者需要做的筛选比较复杂时,性能低。
而在结合 WHERE 子句使用内连结的时候, 我们也可以更改任务顺序, 并采用任务分解的方法,先分别在两个表使用 WHERE 进行筛选,然后把上述两个子查询连结起来。
# 先分别在两张表里做筛选,把复杂的筛选条件按表分拆,然后把筛选结果(作为表)连接起来
select
sp.shop_id AS shop_id,
sp.shop_name AS shop_name,
p.product_id AS product_id,
p.product_name AS product_name,
p.product_type AS product_type
from (
select *
from tbl_product
where product_type = 'clothes'
) AS p
inner join (
select *
from tbl_shop_product
where shop_name = 'Nagoya'
) AS sp
on p.product_id = sp.product_id;
+---------+-----------+------------+----------------+--------------+
| shop_id | shop_name | product_id | product_name | product_type |
+---------+-----------+------------+----------------+--------------+
| 000B | Nagoya | 0003 | sports T-shirt | clothes |
+---------+-----------+------------+----------------+--------------+
这种看似复杂的写法,避免了写复杂的筛选条件,实际上整体的逻辑反而非常清晰,在写查询的过程中,首先要按照最便于自己理解的方式来写,先把问题解决了,再思考优化的问题。
练习题: 找出每个商店里的衣服类商品的名称及价格信息。
# 不使用子查询
select sp.shop_id, sp.shop_name, p.product_id, p.product_name, p.product_type, p.purchase_price
from tbl_shop_product as sp
inner join tbl_product as p
on sp.product_id = p.product_id
where p.product_type = 'clothes';
+---------+-----------+------------+----------------+--------------+----------------+
| shop_id | shop_name | product_id | product_name | product_type | purchase_price |
+---------+-----------+------------+----------------+--------------+----------------+
| 000A | Tokyo | 0001 | T-shirt | clothes | 500 |
| 000A | Tokyo | 0003 | sports T-shirt | clothes | 2800 |
| 000B | Nagoya | 0003 | sports T-shirt | clothes | 2800 |
| 000C | Osaka | 0003 | sports T-shirt | clothes | 2800 |
+---------+-----------+------------+----------------+--------------+----------------+
# 使用子查询
select sp.shop_id, sp.shop_name, p.product_id, p.product_name, p.product_type, p.purchase_price
from (
select * from tbl_shop_product
) as sp
inner join (
select *
from tbl_product
where product_type = 'clothes'
) as p
on sp.product_id = p.product_id;
+---------+-----------+------------+----------------+--------------+----------------+
| shop_id | shop_name | product_id | product_name | product_type | purchase_price |
+---------+-----------+------------+----------------+--------------+----------------+
| 000A | Tokyo | 0001 | T-shirt | clothes | 500 |
| 000A | Tokyo | 0003 | sports T-shirt | clothes | 2800 |
| 000B | Nagoya | 0003 | sports T-shirt | clothes | 2800 |
| 000C | Osaka | 0003 | sports T-shirt | clothes | 2800 |
+---------+-----------+------------+----------------+--------------+----------------+
练习题: 分别使用连结两个子查询和不使用子查询的方式, 找出东京商店里, 售价低于 2000 的商品信息。
# 不使用子查询
select shop_id, shop_name, p.product_id, product_name, product_type, sale_price
from tbl_product as p
inner join tbl_shop_product as sp
on p.product_id = sp.product_id
where p.sale_price < 2000
and sp.shop_name = 'Tokyo';
+---------+-----------+------------+--------------+-----------------+------------+
| shop_id | shop_name | product_id | product_name | product_type | sale_price |
+---------+-----------+------------+--------------+-----------------+------------+
| 000A | Tokyo | 0001 | T-shirt | clothes | 1000 |
| 000A | Tokyo | 0002 | puncher | Office supplies | 500 |
+---------+-----------+------------+--------------+-----------------+------------+
# 使用子查询
select shop_id, shop_name, p.product_id, product_name, product_type, sale_price
from (
select *
from tbl_product
where sale_price < 2000
) as p
inner join (
select *
from tbl_shop_product
where shop_name = 'Tokyo'
) as sp
on p.product_id = sp.product_id;
+---------+-----------+------------+--------------+-----------------+------------+
| shop_id | shop_name | product_id | product_name | product_type | sale_price |
+---------+-----------+------------+--------------+-----------------+------------+
| 000A | Tokyo | 0001 | T-shirt | clothes | 1000 |
| 000A | Tokyo | 0002 | puncher | Office supplies | 500 |
+---------+-----------+------------+--------------+-----------------+------------+
2.1.3 结合 GROUP BY 子句使用内连结
需要根据 分组列 位于哪个表,来进行区别对待。
最简单的情形:在内连结之前就使用 GROUP BY 子句。但是如果 分组列 和 被聚合的列 不在同一张表,且二者都未被用于连结两张表(都不是连结时的公共列),就只能先连结,再聚合。
练习题: 查询 每个商店中,商品的最高售价。
select
sp.shop_id,
sp.shop_name,
max(p.sale_price) as max_price
from tbl_shop_product as sp
inner join tbl_product as p
on p.product_id = sp.product_id
group by sp.shop_id, sp.shop_name;
+---------+-----------+-----------+
| shop_id | shop_name | max_price |
+---------+-----------+-----------+
| 000A | Tokyo | 4000 |
| 000B | Nagoya | 4000 |
| 000C | Osaka | 4000 |
+---------+-----------+-----------+
**思考题:**上述查询得到了每个商品售价最高的商品,但并不知道售价最高的商品是哪一个。如何获取每个商店里售价最高的商品的名称和售价?
注: 这道题的一个简易的方式是使用下一章的窗口函数。当然,也可以使用其他我们已经学过的知识来实现,例如,在找出每个商店售价最高商品的价格后,使用这个价格再与 tbl_product 列进行连结,但这种做法在价格不唯一时会出现问题。
2.1.4 自连结(SELF JOIN)
一张表与自己连结,就叫自连结。
注意:自连结并不是区分于内连结和外连结的第三种连结。自连结可以是外连结也可以是内连结,它是不同于内连结外连结的另一个连结的分类方法。
练习题: 查询每个员工的上级领导信息。
SELECT e.EMPNO AS emp_id, e.ENAME AS emp_name, m.EMPNO AS manag_id, m.ENAME AS manag_name
FROM EMP AS e
INNER JOIN EMP AS m
ON e.MGR = m.EMPNO;
+--------+----------+----------+------------+
| emp_id | emp_name | manag_id | manag_name |
+--------+----------+----------+------------+
| 7369 | SMITH | 7902 | FORD |
| 7499 | ALLEN | 7698 | BLAKE |
| 7521 | WARD | 7698 | BLAKE |
| 7566 | JONES | 7839 | KING |
| 7654 | MARTIN | 7698 | BLAKE |
| 7698 | BLAKE | 7839 | KING |
| 7782 | CLARK | 7839 | KING |
| 7788 | SCOTT | 7566 | JONES |
| 7844 | TURNER | 7698 | BLAKE |
| 7876 | ADAMS | 7788 | SCOTT |
| 7900 | JAMES | 7698 | BLAKE |
| 7902 | FORD | 7566 | JONES |
| 7934 | MILLER | 7782 | CLARK |
+--------+----------+----------+------------+
2.1.5 内连结与关联子查询
上次关联子查询中的问题:找出每个商品种类当中售价高于该类商品的平均售价的商品。当时使用关联子查询来实现的。
select p1.product_name, p1.product_type, p1.sale_price
from tbl_product as p1
where sale_price > (
select avg(sale_price)
from tbl_product as p2
where p1.product_type = p2.product_type
group by product_type
);
+-----------------+-----------------+------------+
| product_name | product_type | sale_price |
+-----------------+-----------------+------------+
| puncher | Office supplies | 500 |
| sports T-shirt | clothes | 4000 |
| kitchen knife | kitchenware | 3000 |
| pressure cooker | kitchenware | 6800 |
+-----------------+-----------------+------------+
现在可以使用内连结解决。
第一步:使用 GROUP BY 按商品类别分类计算每类商品的平均价格。
select product_type, avg(sale_price) as avg_sale_price
from tbl_product
group by product_type;
+-----------------+----------------+
| product_type | avg_sale_price |
+-----------------+----------------+
| clothes | 2500.0000 |
| Office supplies | 300.0000 |
| kitchenware | 2795.0000 |
+-----------------+----------------+
第二步:将上述查询与表 tbl_product 按照 product_type (商品种类)进行内连结。
select p1.product_name, p1.product_type, p1.sale_price, p2.avg_sale_price
from tbl_product p1
inner join (select product_type, avg(sale_price) as avg_sale_price
from tbl_product
group by product_type) as p2
on p1.product_type = p2.product_type;
+-----------------+-----------------+------------+----------------+
| product_name | product_type | sale_price | avg_sale_price |
+-----------------+-----------------+------------+----------------+
| T-shirt | clothes | 1000 | 2500.0000 |
| puncher | Office supplies | 500 | 300.0000 |
| sports T-shirt | clothes | 4000 | 2500.0000 |
| kitchen knife | kitchenware | 3000 | 2795.0000 |
| pressure cooker | kitchenware | 6800 | 2795.0000 |
| fork | kitchenware | 500 | 2795.0000 |
| grater | kitchenware | 880 | 2795.0000 |
| ballpoint | Office supplies | 100 | 300.0000 |
+-----------------+-----------------+------------+----------------+
第三步:添加 where 筛选条件,过滤出售价高于该类商品平均价格的商品。
select p1.product_name, p1.product_type, p1.sale_price, p2.avg_sale_price
from tbl_product p1
inner join (select product_type, avg(sale_price) as avg_sale_price
from tbl_product
group by product_type) as p2
on p1.product_type = p2.product_type
where p1.sale_price > p2.avg_sale_price;
+-----------------+-----------------+------------+----------------+
| product_name | product_type | sale_price | avg_sale_price |
+-----------------+-----------------+------------+----------------+
| puncher | Office supplies | 500 | 300.0000 |
| sports T-shirt | clothes | 4000 | 2500.0000 |
| kitchen knife | kitchenware | 3000 | 2795.0000 |
| pressure cooker | kitchenware | 6800 | 2795.0000 |
+-----------------+-----------------+------------+----------------+
使用内连接更容易理解。
2.1.6 自然连结 ( NATURAL JOIN )
自然连结并不是区别于内连结和外连结的第三种连结。其实是内连结的特例:当两个表进行自然连结时,会按照两个表中都包含的列名来进行等值内连结,此时无需使用 ON 来指定连接条件。
自然连结会把两个表的公共列(可以是多个公共列)放在第一列,然后按照两个表的顺序和表中列的顺序,两个表中的其他列都罗列出来。
SELECT * FROM tbl_shop_product
NATURAL JOIN tbl_product;
+------------+---------+-----------+----------+----------------+-----------------+------------+----------------+---------------+
| product_id | shop_id | shop_name | quantity | product_name | product_type | sale_price | purchase_price | register_date |
+------------+---------+-----------+----------+----------------+-----------------+------------+----------------+---------------+
| 0001 | 000A | Tokyo | 30 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | 000A | Tokyo | 50 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | 000A | Tokyo | 15 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0002 | 000B | Nagoya | 30 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | 000B | Nagoya | 120 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0004 | 000B | Nagoya | 20 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
| 0006 | 000B | Nagoya | 10 | fork | kitchenware | 500 | NULL | 2009-09-20 |
| 0007 | 000B | Nagoya | 40 | grater | kitchenware | 880 | 790 | 2008-04-28 |
| 0003 | 000C | Osaka | 20 | sports T-shirt | clothes | 4000 | 2800 | NULL |
| 0004 | 000C | Osaka | 50 | kitchen knife | kitchenware | 3000 | 2800 | 2009-09-20 |
+------------+---------+-----------+----------+----------------+-----------------+------------+----------------+---------------+
练习题: 试写出与上述自然连结等价的内连结。
SELECT
sp.product_id, sp.shop_id, sp.shop_name, sp.quantity,
p.product_name, p.product_type, p.sale_price, p.purchase_price, p.register_date
FROM tbl_shop_product AS sp
INNER JOIN tbl_product AS p
ON sp.product_id = p.product_id;
使用自然连结还可以求出两张表或两个子查询的公共部分(交集)。
例如教材中 7-1 选取表中公共部分–INTERSECT 一节中的问题: 求表 tbl_product 和表 tbl_product2 中的公共部分, 也可以用自然连结来实现:
# 前面的写法 用 in 实现 A ∩ B,或者A ∩ B = A ∪ B - (A - B) ∪ (B - A)
select * from tbl_product
where product_id in (
select product_id
from tbl_product2
);
+------------+----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
+------------+----------------+-----------------+------------+----------------+---------------+
# 或
select * from tbl_product2
where product_id in (
select product_id
from tbl_product
);
+------------+----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
+------------+----------------+-----------------+------------+----------------+---------------+
# 等价写法 内连接
select p1.product_id, p1.product_name, p1.product_type, p1.sale_price, p1.purchase_price, p1.register_date
from tbl_product as p1
inner join tbl_product2 as p2
on p1.product_id = p2.product_id;
+------------+----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
+------------+----------------+-----------------+------------+----------------+---------------+
# 使用 自然连结实现
SELECT * FROM tbl_product
NATURAL JOIN tbl_product2;
+------------+--------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+--------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
+------------+--------------+-----------------+------------+----------------+---------------+
这个结果集里面少了 sports T-shirt ,因为 sports T-shirt 的 register_date 字段为空。
在进行自然连结时, 来自于 tbl_product 和 tbl_product2 的 sports T-shirt 这一行数据在进行比较时,实际上是在逐字段进行等值连结。回忆 IS NULL、IS NOT NULL 的缺失值比较方法就可得知,两个缺失值用等号进行比较,结果不为真,而连结只会返回连结条件为真的那些记录行。
如果我们将查询语句进行修改:
# from后面跟的表 没有 register_date 字段就行了
SELECT *
FROM (
SELECT product_id, product_name, product_type, sale_price, purchase_price
FROM tbl_product
) AS p1
NATURAL JOIN (
SELECT product_id, product_name, product_type, sale_price, purchase_price
FROM tbl_product2
) AS p2;
+------------+----------------+-----------------+------------+----------------+
| product_id | product_name | product_type | sale_price | purchase_price |
+------------+----------------+-----------------+------------+----------------+
| 0001 | T-shirt | clothes | 1000 | 500 |
| 0002 | puncher | Office supplies | 500 | 320 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 |
+------------+----------------+-----------------+------------+----------------+
2.1.7 使用内连结求交集
MySQL 8.0 里没有交集运算,前面是通过并集和差集来实现求交集的,现在可以使用连结来实现交集运算了。
练习题: 使用内连结求 tbl_product 表和 tbl_product2 表的交集。
select p1.*
from tbl_product as p1
inner join tbl_product2 as p2
on p1.product_id = p2.product_id;
+------------+----------------+-----------------+------------+----------------+---------------+
| product_id | product_name | product_type | sale_price | purchase_price | register_date |
+------------+----------------+-----------------+------------+----------------+---------------+
| 0001 | T-shirt | clothes | 1000 | 500 | 2009-09-20 |
| 0002 | puncher | Office supplies | 500 | 320 | 2009-09-11 |
| 0003 | sports T-shirt | clothes | 4000 | 2800 | NULL |
+------------+----------------+-----------------+------------+----------------+---------------+
2.2 外连结 (OUTER JOIN)
2.2.1 内连结与外连结区别
-
内连结:会丢弃两张表中不满足 ON 条件的行。
-
外连结:会根据外连结的种类有选择地保留无法匹配到的行。
2.2.2 外连结分类
- 按照保留的行位于哪张表,外连结有三种形式:左连结、右连结和全外连结。
- 左连结:会保存左表中无法按照 ON 子句匹配到的行,此时对应右表的行都是 NULL 。
- 右连结:会保存右表中无法按照 ON 子句匹配到的行,此时对应左表的行都是 NULL 。
- 全外连结:会同时保存两个表中无法按照 ON子句匹配到的行,相应的另一张表中的行用 NULL 填充。
语法:
# 左连结
FROM tbl_name1 LEFT OUTER JOIN tbl_name2 ON <condition(s)>
# 右连结
FROM tbl_name1 RIGHT OUTER JOIN tbl_name2 ON <condition(s)>
# 全外连结
FROM tbl_name1 FULL OUTER JOIN tbl_name2 ON <condition(s)>
2.3 左外连结与右外连结
由于连结时可以交换左表和右表的位置,因此左连结和右连结并没有本质区别。接下来我们先以左连结为例进行学习。
2.3.1 使用左外连结从两个表获取信息
如果你仔细观察过将 tbl_shop_product 和 tbl_product 进行内连结前后的结果的话,你就会发现,tbl_product 表中有两种商品并未在内连结的结果里。就是说,这两种商品并未在任何商店有售(这通常意味着比较重要的业务信息,例如,这两种商品在所有商店都处于缺货状态,需要及时补货。
# 内连结
select p.product_id, p.product_name, sp.shop_id, sp.shop_name
from tbl_product as p
inner join tbl_shop_product as sp
on sp.product_id = p.product_id;
+------------+----------------+---------+-----------+
| product_id | product_name | shop_id | shop_name |
+------------+----------------+---------+-----------+
| 0001 | T-shirt | 000A | Tokyo |
| 0002 | puncher | 000A | Tokyo |
| 0003 | sports T-shirt | 000A | Tokyo |
| 0002 | puncher | 000B | Nagoya |
| 0003 | sports T-shirt | 000B | Nagoya |
| 0004 | kitchen knife | 000B | Nagoya |
| 0006 | fork | 000B | Nagoya |
| 0007 | grater | 000B | Nagoya |
| 0003 | sports T-shirt | 000C | Osaka |
| 0004 | kitchen knife | 000C | Osaka |
+------------+----------------+---------+-----------+
# 左外连结
select p.product_id, p.product_name, sp.shop_id, sp.shop_name
from tbl_product as p
left outer join tbl_shop_product as sp
on sp.product_id = p.product_id;
+------------+-----------------+---------+-----------+
| product_id | product_name | shop_id | shop_name |
+------------+-----------------+---------+-----------+
| 0001 | T-shirt | 000A | Tokyo |
| 0002 | puncher | 000B | Nagoya |
| 0002 | puncher | 000A | Tokyo |
| 0003 | sports T-shirt | 000C | Osaka |
| 0003 | sports T-shirt | 000B | Nagoya |
| 0003 | sports T-shirt | 000A | Tokyo |
| 0004 | kitchen knife | 000C | Osaka |
| 0004 | kitchen knife | 000B | Nagoya |
| 0005 | pressure cooker | NULL | NULL |
| 0006 | fork | 000B | Nagoya |
| 0007 | grater | 000B | Nagoya |
| 0008 | ballpoint | NULL | NULL |
+------------+-----------------+---------+-----------+
对比一下内连结和左外连结的结果,发现相比内连结,外连结结果集多了两行:
| 0005 | pressure cooker | NULL | NULL |
| 0008 | ballpoint | NULL | NULL |
练习题: 统计每种商品分别在哪些商店有售,需要包括那些在每个商店都没货的商品。
select sp.shop_id, sp.shop_name, p.product_id, p.product_name
from tbl_product as p
left outer join tbl_shop_product as sp
on sp.product_id = p.product_id;
+---------+-----------+------------+-----------------+
| shop_id | shop_name | product_id | product_name |
+---------+-----------+------------+-----------------+
| 000A | Tokyo | 0001 | T-shirt |
| 000B | Nagoya | 0002 | puncher |
| 000A | Tokyo | 0002 | puncher |
| 000C | Osaka | 0003 | sports T-shirt |
| 000B | Nagoya | 0003 | sports T-shirt |
| 000A | Tokyo | 0003 | sports T-shirt |
| 000C | Osaka | 0004 | kitchen knife |
| 000B | Nagoya | 0004 | kitchen knife |
| NULL | NULL | 0005 | pressure cooker |
| 000B | Nagoya | 0006 | fork |
| 000B | Nagoya | 0007 | grater |
| NULL | NULL | 0008 | ballpoint |
+---------+-----------+------------+-----------------+
观察结果发现,有两种商品: pressure cooker 和 ballpoint ,在所有商店都没有销售。
外连结要点 1: 选取出单张表中全部的信息
与内连结的结果相比,结果的行数不一样。外连结结果记录行数更多,那增加的记录到底是什么呢?这正是外连结的关键点。查看外连结结果就知道了,多出的 2 条记录是 pressure cooker 和 ballpoint。
-
由于内连结只能选取出同时存在于两张表中的数据,因此只在 tbl_product 表中存在的 2 种商品并没有出现在结果之中。
-
相反,对于外连结来说,只要数据存在于某一张表当中,就能够读取出来。
外连结的使用场景:想要生成固定行数的单据。因为外连结能够得到固定行数的结果。
但是表中不存在的信息还是无法得到,结果中 pressure cooker 和 ballpoint 的商店编号和商店名称都是 NULL 。外连结名称的由来也跟 NULL 有关,即 “结果集中包含原表中不存在(在原表之外)的信息”。相反,只包含表内信息的连结也就被称为内连结了。
外连结要点 2:使用 LEFT、RIGHT 来指定主表
外连结的重点是要把哪张表作为主表。左连结把左表作为主表,右连结把右表作为主表,最终的结果中会包含主表内所有的数据。左连结和右连结没有本质区别,一般用左连结多。
2.3.2 结合 WHERE 子句使用左连结
为什么要结合 WHERE 子句使用左连结?
在结合 WHERE 子句使用外连结时,外连结的结果会包含那些主表中无法匹配到的行,并用缺失值填写另一表中的列,由于这些行的存在,因此在外连结时使用 WHERE 子句。我们来看一个例子:
练习题:使用外连结从 tbl_shop_product 表和 tbl_product 表中找出那些在某个商店库存少于50的商品及对应的商店。
注意: 高压锅和圆珠笔两种商品在所有商店都无货,所以也应该包括在内。
select p.product_name, sp.shop_name, sp.quantity
from tbl_product as p
left outer join tbl_shop_product as sp
on p.product_id = sp.product_id
where sp.quantity < 50;
+----------------+-----------+----------+
| product_name | shop_name | quantity |
+----------------+-----------+----------+
| T-shirt | Tokyo | 30 |
| sports T-shirt | Tokyo | 15 |
| puncher | Nagoya | 30 |
| kitchen knife | Nagoya | 20 |
| fork | Nagoya | 10 |
| grater | Nagoya | 40 |
| sports T-shirt | Osaka | 20 |
+----------------+-----------+----------+
少了高压锅和圆珠笔。
需要增加一个 OR sp.quantity IS NULL
条件。
select p.product_id, p.product_name, sp.shop_id, sp.shop_name, sp.quantity
from tbl_product as p
left outer join tbl_shop_product as sp
on p.product_id = sp.product_id
where sp.quantity < 50
or sp.quantity is null;
+------------+-----------------+---------+-----------+----------+
| product_id | product_name | shop_id | shop_name | quantity |
+------------+-----------------+---------+-----------+----------+
| 0001 | T-shirt | 000A | Tokyo | 30 |
| 0002 | puncher | 000B | Nagoya | 30 |
| 0003 | sports T-shirt | 000C | Osaka | 20 |
| 0003 | sports T-shirt | 000A | Tokyo | 15 |
| 0004 | kitchen knife | 000B | Nagoya | 20 |
| 0005 | pressure cooker | NULL | NULL | NULL |
| 0006 | fork | 000B | Nagoya | 10 |
| 0007 | grater | 000B | Nagoya | 40 |
| 0008 | ballpoint | NULL | NULL | NULL |
+------------+-----------------+---------+-----------+----------+
然而在真实的查询环境中,由于数据量大且数据质量并非设想的那样"干净",我们并不能容易地意识到缺失值等问题数据的存在。因此,还是让我们想一下如何改写我们的查询以使得它能够适应更复杂的真实数据的情形吧。
SQL查询执行顺序:1 FROM -> 2 WHERE -> 3 GROUP BY 4 HAVING -> 5 SELECT -> 6 ORDER BY。问题可能出在 WHERE 筛选条件上,因为在进行完外连结后才会执行WHERE子句,因此那些主表中无法被匹配到的行就被WHERE条件筛选掉了。来试着把 WHERE 子句挪到外连结之前进行:先写个子查询,用来从 tbl_shop_product 表中筛选 quantity<50 的商品,然后再把这个子查询和主表连结起来。
select p.product_id, p.product_name, sp.shop_id, sp.shop_name, sp.quantity
from tbl_product as p
left outer join (
select *
from tbl_shop_product
where quantity < 50
) as sp
on p.product_id = sp.product_id;
+------------+-----------------+---------+-----------+----------+
| product_id | product_name | shop_id | shop_name | quantity |
+------------+-----------------+---------+-----------+----------+
| 0001 | T-shirt | 000A | Tokyo | 30 |
| 0002 | puncher | 000B | Nagoya | 30 |
| 0003 | sports T-shirt | 000C | Osaka | 20 |
| 0003 | sports T-shirt | 000A | Tokyo | 15 |
| 0004 | kitchen knife | 000B | Nagoya | 20 |
| 0005 | pressure cooker | NULL | NULL | NULL |
| 0006 | fork | 000B | Nagoya | 10 |
| 0007 | grater | 000B | Nagoya | 40 |
| 0008 | ballpoint | NULL | NULL | NULL |
+------------+-----------------+---------+-----------+----------+
2.3.3 在 MySQL 中实现全外连结
全外连结本质上就是对左表和右表的所有行都予以保留,能用 ON 关联到的就把左表和右表的内容在一行内显示,不能被关联到的就分别显示,然后把多余的列用缺失值填充。
MySQL 8.0 目前还不支持全外连结,但是通过 UNION 操作左连结和右连结的结果来实现全外连结。
2.3.4 多表连结
通常连结只涉及 2 张表,但有时也会出现必须同时连结 3 张以上的表的情况, 原则上连结表的数量并没有限制。
多表进行内连结
首先创建一个用于三表连结的表 tbl_inventory_product。首先我们创建一张用来管理库存商品的表, 假设商品都保存在 P001 和 P002 这 2 个仓库中。
CREATE TABLE tbl_inventory_product
(inventory_id CHAR(4) NOT NULL,
product_id CHAR(4) NOT NULL,
inventory_quantity INTEGER NOT NULL,
PRIMARY KEY (inventory_id, product_id)
);
--- DML:插入数据
START TRANSACTION;
INSERT INTO tbl_inventory_product VALUES ('P001', '0001', 0);
INSERT INTO tbl_inventory_product VALUES ('P001', '0002', 120);
INSERT INTO tbl_inventory_product VALUES ('P001', '0003', 200);
INSERT INTO tbl_inventory_product VALUES ('P001', '0004', 3);
INSERT INTO tbl_inventory_product VALUES ('P001', '0005', 0);
INSERT INTO tbl_inventory_product VALUES ('P001', '0006', 99);
INSERT INTO tbl_inventory_product VALUES ('P001', '0007', 999);
INSERT INTO tbl_inventory_product VALUES ('P001', '0008', 200);
INSERT INTO tbl_inventory_product VALUES ('P002', '0001', 10);
INSERT INTO tbl_inventory_product VALUES ('P002', '0002', 25);
INSERT INTO tbl_inventory_product VALUES ('P002', '0003', 34);
INSERT INTO tbl_inventory_product VALUES ('P002', '0004', 19);
INSERT INTO tbl_inventory_product VALUES ('P002', '0005', 99);
INSERT INTO tbl_inventory_product VALUES ('P002', '0006', 0);
INSERT INTO tbl_inventory_product VALUES ('P002', '0007', 0 );
INSERT INTO tbl_inventory_product VALUES ('P002', '0008', 18);
COMMIT;
select * from tbl_inventory_product;
+--------------+------------+--------------------+
| inventory_id | product_id | inventory_quantity |
+--------------+------------+--------------------+
| P001 | 0001 | 0 |
| P001 | 0002 | 120 |
| P001 | 0003 | 200 |
| P001 | 0004 | 3 |
| P001 | 0005 | 0 |
| P001 | 0006 | 99 |
| P001 | 0007 | 999 |
| P001 | 0008 | 200 |
| P002 | 0001 | 10 |
| P002 | 0002 | 25 |
| P002 | 0003 | 34 |
| P002 | 0004 | 19 |
| P002 | 0005 | 99 |
| P002 | 0006 | 0 |
| P002 | 0007 | 0 |
| P002 | 0008 | 18 |
+--------------+------------+--------------------+
接下来,根据 tbl_inventory_product 表、tbl_shop_product 表、 tbl_product 表,使用内连接找出每个商店都有那些商品,每种商品的库存总量分别是多少。
select sp.shop_id, sp.shop_name, p.product_id, p.product_name, ip.inventory_id, ip.inventory_quantity
from tbl_shop_product as sp
inner join tbl_product as p
on sp.product_id = p.product_id
inner join tbl_inventory_product as ip
on sp.product_id = ip.product_id
order by shop_id;
+---------+-----------+------------+----------------+--------------+--------------------+
| shop_id | shop_name | product_id | product_name | inventory_id | inventory_quantity |
+---------+-----------+------------+----------------+--------------+--------------------+
| 000A | Tokyo | 0001 | T-shirt | P001 | 0 |
| 000A | Tokyo | 0002 | puncher | P001 | 120 |
| 000A | Tokyo | 0003 | sports T-shirt | P001 | 200 |
| 000A | Tokyo | 0003 | sports T-shirt | P002 | 34 |
| 000A | Tokyo | 0002 | puncher | P002 | 25 |
| 000A | Tokyo | 0001 | T-shirt | P002 | 10 |
| 000B | Nagoya | 0007 | grater | P002 | 0 |
| 000B | Nagoya | 0006 | fork | P002 | 0 |
| 000B | Nagoya | 0004 | kitchen knife | P002 | 19 |
| 000B | Nagoya | 0003 | sports T-shirt | P002 | 34 |
| 000B | Nagoya | 0002 | puncher | P002 | 25 |
| 000B | Nagoya | 0007 | grater | P001 | 999 |
| 000B | Nagoya | 0006 | fork | P001 | 99 |
| 000B | Nagoya | 0004 | kitchen knife | P001 | 3 |
| 000B | Nagoya | 0003 | sports T-shirt | P001 | 200 |
| 000B | Nagoya | 0002 | puncher | P001 | 120 |
| 000C | Osaka | 0003 | sports T-shirt | P002 | 34 |
| 000C | Osaka | 0004 | kitchen knife | P001 | 3 |
| 000C | Osaka | 0004 | kitchen knife | P002 | 19 |
| 000C | Osaka | 0003 | sports T-shirt | P001 | 200 |
+---------+-----------+------------+----------------+--------------+--------------------+
20 rows in set (0.00 sec)
连结第三张表的时候,也是通过 ON 子句指定连结条件,由于 tbl_product 表和 tbl_shop_product 表已经进行了连结,因此就无需再对 tbl_product 表和 tbl_inventory_product 表进行连结了。
多表进行外连结
正如之前所学发现的, 外连结一般能比内连结有更多的行, 从而能够比内连结给出更多关于主表的信息, 多表连结的时候使用外连结也有同样的作用。
例如:
select p.product_id, p.product_name, sp.shop_id, sp.shop_name, ip.inventory_id, ip.inventory_quantity
from tbl_product as p
left outer join tbl_shop_product as sp
on sp.product_id = p.product_id
left outer join tbl_inventory_product as ip
on sp.product_id = ip.product_id
order by shop_id;
+------------+-----------------+---------+-----------+--------------+--------------------+
| product_id | product_name | shop_id | shop_name | inventory_id | inventory_quantity |
+------------+-----------------+---------+-----------+--------------+--------------------+
| 0008 | ballpoint | NULL | NULL | NULL | NULL |
| 0005 | pressure cooker | NULL | NULL | NULL | NULL |
| 0003 | sports T-shirt | 000A | Tokyo | P001 | 200 |
| 0001 | T-shirt | 000A | Tokyo | P002 | 10 |
| 0003 | sports T-shirt | 000A | Tokyo | P002 | 34 |
| 0001 | T-shirt | 000A | Tokyo | P001 | 0 |
| 0002 | puncher | 000A | Tokyo | P002 | 25 |
| 0002 | puncher | 000A | Tokyo | P001 | 120 |
| 0007 | grater | 000B | Nagoya | P001 | 999 |
| 0003 | sports T-shirt | 000B | Nagoya | P002 | 34 |
| 0003 | sports T-shirt | 000B | Nagoya | P001 | 200 |
| 0002 | puncher | 000B | Nagoya | P001 | 120 |
| 0007 | grater | 000B | Nagoya | P002 | 0 |
| 0004 | kitchen knife | 000B | Nagoya | P002 | 19 |
| 0004 | kitchen knife | 000B | Nagoya | P001 | 3 |
| 0002 | puncher | 000B | Nagoya | P002 | 25 |
| 0006 | fork | 000B | Nagoya | P002 | 0 |
| 0006 | fork | 000B | Nagoya | P001 | 99 |
| 0004 | kitchen knife | 000C | Osaka | P001 | 3 |
| 0004 | kitchen knife | 000C | Osaka | P002 | 19 |
| 0003 | sports T-shirt | 000C | Osaka | P001 | 200 |
| 0003 | sports T-shirt | 000C | Osaka | P002 | 34 |
+------------+-----------------+---------+-----------+--------------+--------------------+
22 rows in set (0.00 sec)
2.4 ON 子句进阶–非等值连结
非等值连结:使用比较运算符( <,<=,>,>=, BETWEEN )、谓词运算( LIKE, IN, NOT 等 )可以放在 ON 子句内作为连结条件的连接。
2.4.1 非等值自左连结(SELF JOIN)
使用非等值自左连结实现排名。
练习题: 希望对 tbl_product 表中的商品按照售价排名。
使用自左连结的思路是,对每一种商品,找出售价不低于它的所有商品,然后对售价不低于它的商品使用 COUNT 函数计数。例如,对于价格最高的商品。
SELECT product_id, product_name, sale_price, COUNT(p2_id) AS my_rank
FROM ( # 使用自左连结对每种商品 找出售价不低于它的商品
SELECT P1.product_id,
P1.product_name,
P1.sale_price,
P2.product_id AS P2_id,
P2.product_name AS P2_name,
P2.sale_price AS P2_price
FROM tbl_product AS P1
LEFT OUTER JOIN tbl_product AS P2
ON P1.sale_price <= P2.sale_price
) AS X
GROUP BY product_id, product_name, sale_price
ORDER BY my_rank;
+------------+-----------------+------------+---------+
| product_id | product_name | sale_price | my_rank |
+------------+-----------------+------------+---------+
| 0005 | pressure cooker | 6800 | 1 |
| 0003 | sports T-shirt | 4000 | 2 |
| 0004 | kitchen knife | 3000 | 3 |
| 0001 | T-shirt | 1000 | 4 |
| 0007 | grater | 880 | 5 |
| 0002 | puncher | 500 | 7 |
| 0006 | fork | 500 | 7 |
| 0008 | ballpoint | 100 | 8 |
+------------+-----------------+------------+---------+
注 1:COUNT 函数的参数是列名时,会忽略该列中的缺失值,参数为 * 时则不忽略缺失值。
注 2:上述排名方案存在一些问题,如果两个商品的价格相等,则会导致两个商品的排名错误,例如, 叉子和打孔器的排名应该都是第六,但上述查询导致二者排名都是第七。试修改上述查询使得二者的排名均为第六。
注 3:其实排名有专门的函数,MySQL 8.0 新增加的窗口函数,但在较低版本的 MySQL 中只能使用上述自左连结的思路。
练习题: 请按照商品的售价从低到高,使用非等值自左连结对售价进行累计求和。
[注:这个案例缺少实际意义,并且由于有两种商品价格相同导致了不必要的复杂度,但示例数据库的表结构比较简单,暂时未想出有实际意义的例题]
首先,对每种商品使用自左连结,找出比该商品售价价格更低或相等的商品。
select p1.product_id as p1_id, p1.product_name as p1_name, p1.sale_price as p1_sale,
p2.product_id as p2_id, p2.product_name as p2_name, p2.sale_price as p2_sale
from tbl_product as p1
left outer join tbl_product as p2
on p1.sale_price >= p2.sale_price;
+-------+-----------------+---------+-------+-----------------+---------+
| p1_id | p1_name | p1_sale | p2_id | p2_name | p2_sale |
+-------+-----------------+---------+-------+-----------------+---------+
| 0001 | T-shirt | 1000 | 0008 | ballpoint | 100 |
| 0001 | T-shirt | 1000 | 0007 | grater | 880 |
| 0001 | T-shirt | 1000 | 0006 | fork | 500 |
| 0001 | T-shirt | 1000 | 0002 | puncher | 500 |
| 0001 | T-shirt | 1000 | 0001 | T-shirt | 1000 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0002 | puncher | 500 | 0008 | ballpoint | 100 |
| 0002 | puncher | 500 | 0006 | fork | 500 |
| 0002 | puncher | 500 | 0002 | puncher | 500 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0003 | sports T-shirt | 4000 | 0008 | ballpoint | 100 |
| 0003 | sports T-shirt | 4000 | 0007 | grater | 880 |
| 0003 | sports T-shirt | 4000 | 0006 | fork | 500 |
| 0003 | sports T-shirt | 4000 | 0004 | kitchen knife | 3000 |
| 0003 | sports T-shirt | 4000 | 0003 | sports T-shirt | 4000 |
| 0003 | sports T-shirt | 4000 | 0002 | puncher | 500 |
| 0003 | sports T-shirt | 4000 | 0001 | T-shirt | 1000 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0004 | kitchen knife | 3000 | 0008 | ballpoint | 100 |
| 0004 | kitchen knife | 3000 | 0007 | grater | 880 |
| 0004 | kitchen knife | 3000 | 0006 | fork | 500 |
| 0004 | kitchen knife | 3000 | 0004 | kitchen knife | 3000 |
| 0004 | kitchen knife | 3000 | 0002 | puncher | 500 |
| 0004 | kitchen knife | 3000 | 0001 | T-shirt | 1000 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0005 | pressure cooker | 6800 | 0008 | ballpoint | 100 |
| 0005 | pressure cooker | 6800 | 0007 | grater | 880 |
| 0005 | pressure cooker | 6800 | 0006 | fork | 500 |
| 0005 | pressure cooker | 6800 | 0005 | pressure cooker | 6800 |
| 0005 | pressure cooker | 6800 | 0004 | kitchen knife | 3000 |
| 0005 | pressure cooker | 6800 | 0003 | sports T-shirt | 4000 |
| 0005 | pressure cooker | 6800 | 0002 | puncher | 500 |
| 0005 | pressure cooker | 6800 | 0001 | T-shirt | 1000 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0006 | fork | 500 | 0008 | ballpoint | 100 |
| 0006 | fork | 500 | 0006 | fork | 500 |
| 0006 | fork | 500 | 0002 | puncher | 500 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0007 | grater | 880 | 0008 | ballpoint | 100 |
| 0007 | grater | 880 | 0007 | grater | 880 |
| 0007 | grater | 880 | 0006 | fork | 500 |
| 0007 | grater | 880 | 0002 | puncher | 500 |
+-------+-----------------+---------+-------+-----------------+---------+
| 0008 | ballpoint | 100 | 0008 | ballpoint | 100 |
+-------+-----------------+---------+-------+-----------------+---------+
看起来似乎没什么问题。
下一步, 按照 p1.product_id 分组,对 p2_sale 求和:
select product_id, product_name, sale_price, sum(p2_sale) as cum_sale
from (
select p1.product_id, p1.product_name, p1.sale_price,
p2.product_name as p2_name, p2.sale_price as p2_sale
from tbl_product as p1
left outer join tbl_product as p2
on p1.sale_price >= p2.sale_price
) as x
group by product_id, product_name, sale_price;
+------------+-----------------+------------+----------+
| product_id | product_name | sale_price | cum_sale |
+------------+-----------------+------------+----------+
| 0001 | T-shirt | 1000 | 2980 |
| 0002 | puncher | 500 | 1100 |
| 0003 | sports T-shirt | 4000 | 9980 |
| 0004 | kitchen knife | 3000 | 5980 |
| 0005 | pressure cooker | 6800 | 16780 |
| 0006 | fork | 500 | 1100 |
| 0007 | grater | 880 | 1980 |
| 0008 | ballpoint | 100 | 100 |
+------------+-----------------+------------+----------+
观察上述查询结果发现,由于有两种商品的售价相同,在使用 >= 进行连结时,导致了累计求和错误,这是由于这两种商品售价相同导致的。因此实际上之前是不应该单独只用 >= 作为连结条件的。
考察我们建立自左连结的本意,是要找出满足:1. 比该商品售价更低的,或者是 2. 该种商品自身,以及 3. 如果 A 和 B 两种商品售价相等,则建立连结时, 如果 p1.A 和 p2.A,p2.B 建立了连接,则 p1.B 不再和 p2.A 建立连结,因此根据上述约束条件,利用 ID 的有序性,进一步将上述查询改写为:
select product_id, product_name, sale_price, sum(p2_sale) as cum_sale
from (
select p1.product_id, p1.product_name, p1.sale_price,
p2.product_id as p2_id, p2.product_name as p2_name, p2.sale_price as p2_sale
from tbl_product as p1
left outer join tbl_product as p2
on ((p1.sale_price > p2.sale_price)
or (p1.sale_price = p2.sale_price
and p1.product_id <= p2.product_id))
ORDER by p1.product_id, p1.sale_price
) as x
group by product_id, product_name, sale_price
ORDER BY sale_price, cum_sale;
+------------+-----------------+------------+----------+
| product_id | product_name | sale_price | cum_sale |
+------------+-----------------+------------+----------+
| 0008 | ballpoint | 100 | 100 |
| 0006 | fork | 500 | 600 |
| 0002 | puncher | 500 | 1100 |
| 0007 | grater | 880 | 1980 |
| 0001 | T-shirt | 1000 | 2980 |
| 0004 | kitchen knife | 3000 | 5980 |
| 0003 | sports T-shirt | 4000 | 9980 |
| 0005 | pressure cooker | 6800 | 16780 |
+------------+-----------------+------------+----------+
2.5 交叉连结 – CROSS JOIN (笛卡尔积)
无论是外连结内连结,一个共同的必备条件就是通过ON 子句指定连结条件。
如果在连结去掉 ON 子句,就变成了交叉连结( CROSS JOIN ),交叉连结又叫笛卡尔积。
笛卡尔积:两个集合做笛卡尔积,就是使用集合 A 中的每一个元素与集合 B 中的每一个元素组成一个有序的组合。
2.5.1 并、交和差 与 交叉连接 区别
- 并、交和差:在纵向上对表进行扩张或筛选限制等运算的,这要求表的列数及对应位置的列的数据类型 “ 相容 ”,所以这些运算并不会增加新的列。
- 交叉连接(笛卡尔积):在横向上对表进行扩张,即增加新的列,这一点和连结的功能是一致的。但因为没有了 ON 子句的限制,会对左表和右表的每一行进行组合,这经常会导致很多无意义的行出现在检索结果中。
交叉连结的语法
- 使用关键字
CROSS JOIN
显式交叉连结
SELECT sp.shop_id,
sp.shop_name,
sp.product_id,
p.product_name,
p.sale_price
FROM tbl_shop_product AS sp
CROSS JOIN tbl_product AS p;
进行交叉连结时无法使用内连结和外连结中所使用的 ON 子句,这是因为交叉连结是对两张表中的全部记录进行交叉组合,因此结果中的记录数通常是两张表中行数的乘积。本例中,因为 tbl_shop_product 表存在 10 条记录,tbl_product 表存在 8 条记录,所以结果中就包含了 13 × 8 = 80 条记录。
内连结是交叉连结的一部分,“ 内 ” 也可以理解为 “ 包含在交叉连结结果中的部分 ”。相反,外连结的 “ 外 ” 可以理解为 “ 交叉连结结果之外的部分 ”。
交叉连结没有应用到实际业务之中的原因有两个:
- 一是其结果没有实用价值
- 二是由于其结果行数太多,需要花费大量的运算时间和高性能设备的支持。
2.5.2 [扩展阅读] 连结与笛卡尔积的关系
-
笛卡尔积可以看作是特殊的连结(事实上笛卡尔积的语法也可以写作 CROSS JOIN),这种连结的 ON 子句是一个恒为真的谓词。
-
反过来,在对笛卡尔积进行适当的限制之后,也就得到了内连结和外连结。
例如,对于 tbl_shop_product 表和 tbl_product 表,首先建立笛卡尔乘积:
SELECT sp.*, p.*
FROM tbl_shop_product AS sp
CROSS JOIN tbl_product AS p;
结果集有80行记录。

然后对上述笛卡尔乘积增加筛选条件 sp.product_id=p.product_id
,就得到了和内连结一致的结果:
SELECT sp.*, p.*
FROM tbl_shop_product AS sp
CROSS JOIN tbl_product AS p
WHERE sp.product_id = p.product_id;

将 CROSS JOIN 改为逗号,其实是内连结的 SQL 92 写法。对应的 SQL 99 写法是:INNER JION … ON …将连结条件从 WHERE 后面放到了 ON后面。这样概念更清晰。
SELECT sp.*, p.*
FROM tbl_shop_product AS sp, tbl_product AS p
WHERE sp.product_id = p.product_id;
2.6 连结的特定语法和过时语法
在笛卡尔积的基础上, 我们增加一个 WHERE 子句, 将之前的连结条件作为筛选条件加进去, 我们会发现, 得到的结果恰好是直接使用内连接的结果。
试执行以下查询, 并将查询结果与 SQL 99 的内连结结果做对比。
SELECT sp.shop_id, sp.shop_name,
p.product_id, p.product_name, p.sale_price
FROM tbl_shop_product AS sp
CROSS JOIN tbl_product AS p
WHERE sp.product_id = p.product_id
AND sp.shop_id = '000A';
+---------+-----------+------------+----------------+------------+
| shop_id | shop_name | product_id | product_name | sale_price |
+---------+-----------+------------+----------------+------------+
| 000A | Tokyo | 0001 | T-shirt | 1000 |
| 000A | Tokyo | 0002 | puncher | 500 |
| 000A | Tokyo | 0003 | sports T-shirt | 4000 |
+---------+-----------+------------+----------------+------------+
SELECT sp.shop_id, sp.shop_name,
p.product_id, p.product_name, p.sale_price
FROM tbl_shop_product AS sp
INNER JOIN tbl_product AS p
ON sp.product_id = p.product_id
WHERE sp.shop_id = '000A';
+---------+-----------+------------+----------------+------------+
| shop_id | shop_name | product_id | product_name | sale_price |
+---------+-----------+------------+----------------+------------+
| 000A | Tokyo | 0001 | T-shirt | 1000 |
| 000A | Tokyo | 0002 | puncher | 500 |
| 000A | Tokyo | 0003 | sports T-shirt | 4000 |
+---------+-----------+------------+----------------+------------+
发现结果是相同的。
使用过时语法的内连结(结果与代码清单 7-9 相同)
SELECT sp.shop_id, sp.shop_name,
p.product_id, p.product_name, p.sale_price
FROM tbl_shop_product AS sp
CROSS JOIN tbl_product AS p
WHERE sp.product_id = p.product_id
AND sp.shop_id = '000A';
+---------+-----------+------------+----------------+------------+
| shop_id | shop_name | product_id | product_name | sale_price |
+---------+-----------+------------+----------------+------------+
| 000A | Tokyo | 0001 | T-shirt | 1000 |
| 000A | Tokyo | 0002 | puncher | 500 |
| 000A | Tokyo | 0003 | sports T-shirt | 4000 |
+---------+-----------+------------+----------------+------------+
不推荐使用,理由有三点:
-
第一,使用这样的语法无法马上判断出到底是内连结还是外连结(又或者是其他种类的连结)。
-
第二,由于连结条件都写在 WHERE 子句之中,因此无法在短时间内分辨出哪部分是连结条件,哪部分是用来选取记录的限制条件。
-
第三,我们不知道这样的语法到底还能使用多久。每个 DBMS 的开发者都会考虑放弃过时的语法,转而支持新的语法。虽然并不是马上就不能使用了,但那一天总会到来的。
练习题
1. 找出 tbl_product 和 tbl_product2 中售价高于 500 的商品的基本信息。
select product_name, product_type, sale_price
from tbl_product
where sale_price > 500
union
select product_name, product_type, sale_price
from tbl_product2
where sale_price > 500;
+-----------------+--------------+------------+
| product_name | product_type | sale_price |
+-----------------+--------------+------------+
| T-shirt | clothes | 1000 |
| sports T-shirt | clothes | 4000 |
| kitchen knife | kitchenware | 3000 |
| pressure cooker | kitchenware | 6800 |
| grater | kitchenware | 880 |
| gloves | clothes | 800 |
| kettle | kitchenware | 2000 |
+-----------------+--------------+------------+
2. 借助对称差的实现方式,求 tbl_product 和 tbl_product2 的交集。
对称差: ( A − B ) ∪ ( B − A ) (A - B) \cup (B - A) (A−B)∪(B−A)
交集: A ∩ B = A ∪ B − ( A − B ) ∪ ( B − A ) A \cap B = A \cup B - (A - B) \cup (B - A) A∩B=A∪B−(A−B)∪(B−A)
# 计算 对称差
select product_id, product_name, product_type, sale_price
from tbl_product
where product_id not in (
select product_id
from tbl_product2
)
union
select product_id, product_name, product_type, sale_price
from tbl_product2
where product_id not in (
select product_id
from tbl_product
);
+------------+-----------------+-----------------+------------+
| product_id | product_name | product_type | sale_price |
+------------+-----------------+-----------------+------------+
| 0004 | kitchen knife | kitchenware | 3000 |
| 0005 | pressure cooker | kitchenware | 6800 |
| 0006 | fork | kitchenware | 500 |
| 0007 | grater | kitchenware | 880 |
| 0008 | ballpoint | Office supplies | 100 |
| 0009 | gloves | clothes | 800 |
| 0010 | kettle | kitchenware | 2000 |
+------------+-----------------+-----------------+------------+
顺便练习一下交运算的其他写法。
# 在对称差的基础上 计算 交集
select p.*
from (
select product_id, product_name, product_type, sale_price
from tbl_product
union
select product_id, product_name, product_type, sale_price
from tbl_product2
) as p
where p.product_id not in (
select product_id from tbl_product
where product_id not in (
select product_id
from tbl_product2
)
union
select product_id from tbl_product2
where product_id not in (
select product_id
from tbl_product
)
);
+------------+----------------+-----------------+------------+
| product_id | product_name | product_type | sale_price |
+------------+----------------+-----------------+------------+
| 0001 | T-shirt | clothes | 1000 |
| 0002 | puncher | Office supplies | 500 |
| 0003 | sports T-shirt | clothes | 4000 |
+------------+----------------+-----------------+------------+
# inner join 实现交运算
select p1.product_id, p1.product_name, p1.product_type, p1.sale_price
from tbl_product as p1
inner join tbl_product2 as p2
on p1.product_id = p2.product_id;
+------------+----------------+-----------------+------------+
| product_id | product_name | product_type | sale_price |
+------------+----------------+-----------------+------------+
| 0001 | T-shirt | clothes | 1000 |
| 0002 | puncher | Office supplies | 500 |
| 0003 | sports T-shirt | clothes | 4000 |
+------------+----------------+-----------------+------------+
# in 实现交运算
select product_id, product_name, product_type, sale_price
from tbl_product
where product_id in (
select product_id
from tbl_product2
);
+------------+----------------+-----------------+------------+
| product_id | product_name | product_type | sale_price |
+------------+----------------+-----------------+------------+
| 0001 | T-shirt | clothes | 1000 |
| 0002 | puncher | Office supplies | 500 |
| 0003 | sports T-shirt | clothes | 4000 |
+------------+----------------+-----------------+------------+
3. 每类商品中售价最高的商品都在哪些商店有售?
# 第一步:自联结 查询 每类商品中售价最高商品的信息
select
p1.product_id, p1.product_name, p1.product_type, p1.sale_price
from tbl_product as p1
inner join (
select product_type, max(sale_price) as max_sale_price
from tbl_product
group by product_type) as p2
on p1.product_type = p2.product_type
where p1.sale_price = p2.max_sale_price;
+------------+-----------------+-----------------+------------+
| product_id | product_name | product_type | sale_price |
+------------+-----------------+-----------------+------------+
| 0002 | puncher | Office supplies | 500 |
| 0003 | sports T-shirt | clothes | 4000 |
| 0005 | pressure cooker | kitchenware | 6800 |
+------------+-----------------+-----------------+------------+
# 第二步 外连结 查询 商店信息
select sp.shop_name, t.product_name, t.product_type, t.sale_price
from tbl_shop_product as sp
right outer join (
select
p1.product_id, p1.product_name, p1.product_type, p1.sale_price
from tbl_product as p1
inner join (
select product_type, max(sale_price) as max_sale_price
from tbl_product
group by product_type) as p2
on p1.product_type = p2.product_type
where p1.sale_price = p2.max_sale_price
) as t
on sp.product_id = t.product_id
order by sale_price desc;
+-----------+-----------------+-----------------+------------+
| shop_name | product_name | product_type | sale_price |
+-----------+-----------------+-----------------+------------+
| NULL | pressure cooker | kitchenware | 6800 |
| Osaka | sports T-shirt | clothes | 4000 |
| Nagoya | sports T-shirt | clothes | 4000 |
| Tokyo | sports T-shirt | clothes | 4000 |
| Nagoya | puncher | Office supplies | 500 |
| Tokyo | puncher | Office supplies | 500 |
+-----------+-----------------+-----------------+------------+
发现,kitchenware 类别中售价最高的 pressure cooker,没有商店在售。
4. 分别使用内连结和关联子查询每一类商品中售价最高的商品。
# 关联子查询
select product_type, product_name, sale_price
from tbl_product as p1
where sale_price = (
select max(sale_price)
from tbl_product as p2
where p1.product_type = p2.product_type
group by product_type
)
order by sale_price desc;
+-----------------+-----------------+------------+
| product_type | product_name | sale_price |
+-----------------+-----------------+------------+
| kitchenware | pressure cooker | 6800 |
| clothes | sports T-shirt | 4000 |
| Office supplies | puncher | 500 |
+-----------------+-----------------+------------+
# 内连结 第一步:两张表按 product_type 连接
select p1.product_type, p1.product_name, p1.sale_price, p2.max_sale_price
from tbl_product as p1
inner join (
select product_type, max(sale_price) as max_sale_price
from tbl_product
group by product_type
) as p2
on p1.product_type = p2.product_type
order by sale_price desc;
+-----------------+-----------------+------------+----------------+
| product_type | product_name | sale_price | max_sale_price |
+-----------------+-----------------+------------+----------------+
| kitchenware | pressure cooker | 6800 | 6800 |
| clothes | sports T-shirt | 4000 | 4000 |
| kitchenware | kitchen knife | 3000 | 6800 |
| clothes | T-shirt | 1000 | 4000 |
| kitchenware | grater | 880 | 6800 |
| Office supplies | puncher | 500 | 500 |
| kitchenware | fork | 500 | 6800 |
| Office supplies | ballpoint | 100 | 500 |
+-----------------+-----------------+------------+----------------+
# 内连结 第二步:第一步结果记录行太多了,所以需要添加一个 where 过滤条件:p1.sale_price = p2.max_sale_price
# 因为 p1.sale_price和p2.max_sale_price了,所以select后面两者中留其一即可
select p1.product_type, p1.product_name, p1.sale_price
from tbl_product as p1
inner join (
select product_type, max(sale_price) as max_sale_price
from tbl_product
group by product_type
) as p2
on p1.product_type = p2.product_type
where p1.sale_price = p2.max_sale_price
order by sale_price desc;
+-----------------+-----------------+------------+
| product_type | product_name | sale_price |
+-----------------+-----------------+------------+
| kitchenware | pressure cooker | 6800 |
| clothes | sports T-shirt | 4000 |
| Office supplies | puncher | 500 |
+-----------------+-----------------+------------+
5. 在 tbl_product 表中,取出 product_id, product_name, sale_price,并按照商品的售价从低到高进行排序、对售价进行累计求和。(用关联子查询实现)
select
t.product_id, t.product_name, t.sale_price, sum(t.p2_sal) as cum_sale_price
from (
select
p1.product_id, p1.product_name, p1.sale_price, p2.sale_price as p2_sal
from tbl_product p1
inner join tbl_product p2
on p1.sale_price > p2.sale_price
or (p1.sale_price = p2.sale_price and p1.product_id <= p2.product_id)
) as t
group by t.product_id, t.product_name, t.sale_price
order by sale_price, cum_sale_price;
+------------+-----------------+------------+----------------+
| product_id | product_name | sale_price | cum_sale_price |
+------------+-----------------+------------+----------------+
| 0008 | ballpoint | 100 | 100 |
| 0006 | fork | 500 | 600 |
| 0002 | puncher | 500 | 1100 |
| 0007 | grater | 880 | 1980 |
| 0001 | T-shirt | 1000 | 2980 |
| 0004 | kitchen knife | 3000 | 5980 |
| 0003 | sports T-shirt | 4000 | 9980 |
| 0005 | pressure cooker | 6800 | 16780 |
+------------+-----------------+------------+----------------+