摘要:
mysql/sql的查询优化器, 会将子查询中的嵌套join进行上拉,形成一个平坦的join列表.
这样形成一个平坦的join的列表, 便于两两之间逐个的join操作.
本文对其进行分析.
参考:
MySQL :: MySQL 8.0 Reference Manual :: 8.2.1.8 Nested Join Optimization
DML:
创建表:
CREATE TABLE `c1md_bank_acct` (
`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT 'ROW_ID',
`SYS_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '系统ID',
`TENANT_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '租户ID',
`COMPANY_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '单位ID',
`ACCOUNT_CODE` varchar(100) NOT NULL COMMENT '银行账号',
`ACCOUNT_NAME` varchar(300) NOT NULL COMMENT '户名',
`ACCOUNT_NAME_EN` varchar(300) DEFAULT NULL COMMENT '户名.英文',
`ACCOUNT_NAME_ALIAS` varchar(300) DEFAULT NULL COMMENT '别名',
`BANK_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '银行ID',
`BANK_CODE` varchar(32) DEFAULT NULL COMMENT '银行代码',
`BANK_NAME` varchar(300) DEFAULT NULL COMMENT '银行名称',
`HBANK_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '联行号ID',
`HBANK_CODE` varchar(32) DEFAULT NULL COMMENT '联行号',
`HBANK_NAME` varchar(300) DEFAULT NULL COMMENT '开户行',
`DISTRICT_CODE` varchar(32) DEFAULT NULL COMMENT '地区编号',
`DISTRICT_NAME` varchar(200) DEFAULT NULL COMMENT '地区名称',
`PROVINCE_CODE` varchar(32) DEFAULT NULL COMMENT '省市代码',
`PROVINCE_NAME` varchar(200) DEFAULT NULL COMMENT '省市名称',
`BANK_INCODE` varchar(100) DEFAULT NULL COMMENT '内部机构号',
`CURRENCY_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '币种ID',
`CURRENCY_CODE` varchar(32) NOT NULL COMMENT '币种代码',
`CURRENCY_NAME` varchar(200) NOT NULL COMMENT '币种名称',
`TYPE_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '类别ID',
`TYPE_CODE` varchar(32) DEFAULT NULL COMMENT '类别代码',
`TYPE_NAME` varchar(100) DEFAULT NULL COMMENT '类别名称',
`ACCT_CLASS` char(1) NOT NULL DEFAULT '0' COMMENT '账户性质:0-收支户;1-收入户;2-支出户;3-中心户',
`ACCT_FLAG` char(1) NOT NULL DEFAULT '0' COMMENT '账户状态:[1]-初始;[2]-启用;[3]-停用;[4]-销户',
`OPEN_DATE` datetime DEFAULT NULL COMMENT '开户日期',
`CANCEL_DATE` datetime DEFAULT NULL COMMENT '注销日期',
`INIT_FLAG` char(1) NOT NULL DEFAULT '0' COMMENT '指定期初余额标识:[0]-不指定;[1]-指定;',
`INIT_DATE` datetime DEFAULT NULL COMMENT '期初日期',
`INIT_BALANCE` decimal(16,2) NOT NULL DEFAULT '0.00' COMMENT '期初余额',
`INIT_STATUS` char(1) NOT NULL DEFAULT '0' COMMENT '期初状态:[0]-初始;[1]-已设置;',
`ONLINE_FLAG` char(1) NOT NULL DEFAULT '0' COMMENT '上线状态标志,[0]-离线;[1]-上线',
`ONLINE_NAME` varchar(200) DEFAULT NULL COMMENT '上线状态标志,[0]-离线;[1]-上线',
`SIGNATURE` varchar(200) DEFAULT NULL COMMENT '开设账户时的预留印签(保留)',
`GL_ACCOUNT_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '会计科目ID(保留)',
`GL_ACCOUNT_CODE` varchar(200) DEFAULT NULL COMMENT '总账科目代码(保留)',
`GL_ACCOUNT_NAME` varchar(200) DEFAULT NULL COMMENT '总账科目名称(保留)',
`DEFAULT_PAY_FLAG` char(1) DEFAULT 'N' COMMENT '默认付款账户标志,[Y]-是;[N]-否。',
`REMARK` varchar(255) DEFAULT NULL COMMENT '备注',
`RESERVER1` decimal(18,2) NOT NULL DEFAULT '0.00' COMMENT '数值型预留字段',
`RESERVER2` varchar(32) DEFAULT NULL COMMENT '短字符预留字段',
`RESERVER3` varchar(200) DEFAULT NULL COMMENT '长字符预留字段',
`DELETED_FLAG` char(1) NOT NULL DEFAULT '0' COMMENT '记录删除标志 [0]-未删除;[1]-逻辑删除',
`ORIGIN_FLAG` char(1) DEFAULT NULL COMMENT '数据来源的标志:[]或[I]-(Input)系统录入;[O]-(Out)外部接口导入;[S]-(System)系统保留。本标志不能挪为它用。',
`ORIGIN_APP` varchar(32) DEFAULT NULL COMMENT '数据来源应用的代码',
`CREATED_BY` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '记录创建人ID',
`CREATED_DATE` datetime DEFAULT NULL COMMENT '记录创建时间',
`LAST_UPD_BY` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '记录最近修改人ID',
`LAST_UPD_DATE` datetime DEFAULT NULL COMMENT '记录最近修改日期',
`MODIFICATION_NUM` decimal(8,0) NOT NULL DEFAULT '0' COMMENT '记录修改次数',
PRIMARY KEY (`ROW_ID`),
KEY `IDX_MDACCT02` (`TENANT_ID`,`ACCOUNT_CODE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='银行账户';
CREATE TABLE `c1am_acct_day` (
`ROW_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT 'ROW_ID',
`SYS_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '系统ID',
`TENANT_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '租户ID',
`ACCOUNT_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '账户ID',
`ACCOUNT_CODE` varchar(100) DEFAULT NULL COMMENT '账号',
`ACCOUNT_NAME` varchar(300) DEFAULT NULL COMMENT '账号户名',
`BANK_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '银行ID',
`BANK_CODE` varchar(32) DEFAULT NULL COMMENT '银行代码',
`BANK_NAME` varchar(300) DEFAULT NULL COMMENT '银行名称',
`HBANK_CODE` varchar(32) DEFAULT NULL COMMENT '开户行行号',
`HBANK_NAME` varchar(300) DEFAULT NULL COMMENT '开户行名称',
`CURRENCY_ID` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '币种ID',
`CURRENCY_CODE` varchar(32) NOT NULL COMMENT '币种代码',
`CURRENCY_NAME` varchar(200) NOT NULL COMMENT '币种名称',
`FISCAL_DATE` date DEFAULT NULL COMMENT '记账日期',
`BALANCE` decimal(16,2) NOT NULL DEFAULT '0.00' COMMENT '余额',
`BALANCE_FLAG` char(1) NOT NULL DEFAULT 'B' COMMENT '余额方向。[D]-借;[C]-贷;[B]-平。默认[B]',
`CAL_TYPE` char(1) NOT NULL COMMENT '计算方式;1-历史明细最后一笔的余额;2-手工设置;3-期初叠加历史明细发生额;4-实时查询;5-历史余额接口',
`REMARK` varchar(1000) DEFAULT NULL COMMENT '备注',
`RESERVER1` decimal(18,2) NOT NULL DEFAULT '0.00' COMMENT '数值型预留字段',
`RESERVER2` varchar(32) DEFAULT NULL COMMENT '短字符预留字段',
`RESERVER3` varchar(200) DEFAULT NULL COMMENT '长字符预留字段',
`DELETED_FLAG` char(1) NOT NULL DEFAULT '0' COMMENT '记录删除标志 [0]-未删除;[1]-逻辑删除',
`ORIGIN_FLAG` char(1) DEFAULT 'I' COMMENT '数据来源的标志:[]或[I]-(Input)系统录入;[O]-(Out)外部接口导入;[S]-(System)系统保留。本标志不能挪为它用。',
`ORIGIN_APP` varchar(32) DEFAULT NULL COMMENT '数据来源应用的代码',
`CREATED_BY` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '记录创建人ID',
`CREATED_DATE` datetime DEFAULT NULL COMMENT '记录创建时间',
`LAST_UPD_BY` decimal(18,0) NOT NULL DEFAULT '-1' COMMENT '记录最近修改人ID',
`LAST_UPD_DATE` datetime DEFAULT NULL COMMENT '记录最近修改日期',
`MODIFICATION_NUM` decimal(8,0) NOT NULL DEFAULT '0' COMMENT '记录修改次数',
PRIMARY KEY (`ROW_ID`),
UNIQUE KEY `IDX_AMACCTDAY02` (`ACCOUNT_ID`,`FISCAL_DATE`),
KEY `IDX_AMACCTDAY01` (`TENANT_ID`),
KEY `IDX_AMACCTDAY03` (`ACCOUNT_ID`),
KEY `IDX_AMACCTDAY04` (`FISCAL_DATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='账户每日余额';
原始的查询SQL
SELECT
a.*
FROM
c1md_bank_acct a
LEFT JOIN (
SELECT xx.account_id,
xx.fiscal_date,
xx.balance
FROM
(
SELECT account_id,
MAX(fiscal_date) fiscal_date
FROM
c1am_acct_day
WHERE
deleted_flag = '0'
GROUP BY
account_id) x
JOIN c1am_acct_day xx ON
x.account_id = xx.account_id
AND x.fiscal_date = xx.fiscal_date
WHERE
xx.deleted_flag = '0') c ON
a.row_id = c.account_id;
查询优化器改写后的查询SQL
SELECT
a.*
FROM
c1md_bank_acct a
LEFT JOIN (
SELECT account_id,
MAX(fiscal_date) fiscal_date
FROM
c1am_acct_day
WHERE
deleted_flag = '0'
GROUP BY
account_id) x ON
a.row_id = x.account_id
LEFT JOIN c1am_acct_day xx ON
x.account_id = xx.account_id
AND x.fiscal_date = xx.fiscal_date
AND xx.deleted_flag = '0';
需要思考的地方:
一. 子查询中的xx表是inner jon, 改写完上拉xx表后, 作为left join
- 关键在于谓词的所处的位置
- 当谓词位于join的on位置时, 会转换成left join
- 当谓词位于join后的where位置时, 会转换成inner join
二. 能决定将嵌套join表进行上拉的条件谓词
- 存在最外表的表, 与嵌套的join表, 进行连接的条件
- 表现为:
-
a.row_id = c.account_id
-
x.account_id = xx.account_id
-
执行嵌套join平坦的处理
SELECT_LEX::simplify_joins
/**
Simplify joins replacing outer joins by inner joins whenever it's
possible.
The function, during a retrieval of join_list, eliminates those
outer joins that can be converted into inner join, possibly nested.
It also moves the join conditions for the converted outer joins
and from inner joins to conds.
The function also calculates some attributes for nested joins:
-# used_tables
-# not_null_tables
-# dep_tables.
-# on_expr_dep_tables
The first two attributes are used to test whether an outer join can
be substituted by an inner join. The third attribute represents the
relation 'to be dependent on' for tables. If table t2 is dependent
on table t1, then in any evaluated execution plan table access to
table t2 must precede access to table t2. This relation is used also
to check whether the query contains invalid cross-references.
The fourth attribute is an auxiliary one and is used to calculate
dep_tables.
As the attribute dep_tables qualifies possibles orders of tables in the
execution plan, the dependencies required by the straight join
modifiers are reflected in this attribute as well.
The function also removes all parentheses that can be removed from the join
expression without changing its meaning.
@note
An outer join can be replaced by an inner join if the where condition
or the join condition for an embedding nested join contains a conjunctive
predicate rejecting null values for some attribute of the inner tables.
E.g. in the query:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
@endcode
the predicate t2.b < 5 rejects nulls.
The query is converted first to:
@code
SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
@endcode
then to the equivalent form:
@code
SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
@endcode
Similarly the following query:
@code
SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
WHERE t2.c < 5
@endcode
is converted to:
@code
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
@endcode
One conversion might trigger another:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
LEFT JOIN t3 ON t3.b=t2.b
WHERE t3 IS NOT NULL =>
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
WHERE t3 IS NOT NULL AND t3.b=t2.b =>
SELECT * FROM t1, t2, t3
WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
@endcode
The function removes all unnecessary parentheses from the expression
produced by the conversions.
E.g.
@code
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
@endcode
finally is converted to:
@code
SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
@endcode
It also will remove parentheses from the following queries:
@code
SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
@endcode
The benefit of this simplification procedure is that it might return
a query for which the optimizer can evaluate execution plans with more
join orders. With a left join operation the optimizer does not
consider any plan where one of the inner tables is before some of outer
tables.
IMPLEMENTATION
The function is implemented by a recursive procedure. On the recursive
ascent all attributes are calculated, all outer joins that can be
converted are replaced and then all unnecessary parentheses are removed.
As join list contains join tables in the reverse order sequential
elimination of outer joins does not require extra recursive calls.
SEMI-JOIN NOTES
Remove all semi-joins that have are within another semi-join (i.e. have
an "ancestor" semi-join nest)
EXAMPLES
Here is an example of a join query with invalid cross references:
@code
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
@endcode
@param thd thread handler
@param join_list list representation of the join to be converted
@param top true <=> cond is the where condition
@param in_sj TRUE <=> processing semi-join nest's children
@param[in,out] cond In: condition to which the join condition for converted
outer joins is to be added;
Out: new condition
@param changelog Don't specify this parameter, it is reserved for
recursive calls inside this function
@returns true for error, false for success
*/
bool
SELECT_LEX::simplify_joins(THD *thd,
List<TABLE_LIST> *join_list, bool top,
bool in_sj, Item **cond, uint *changelog)
{
/*
Each type of change done by this function, or its recursive calls, is
tracked in a bitmap:
*/
enum change
{
NONE= 0,
OUTER_JOIN_TO_INNER= 1 << 0,
JOIN_COND_TO_WHERE= 1 << 1,
PAREN_REMOVAL= 1 << 2,
SEMIJOIN= 1 << 3
};
uint changes= 0; // To keep track of changes.
if (changelog == NULL) // This is the top call.
changelog= &changes;
TABLE_LIST *table;
NESTED_JOIN *nested_join;
TABLE_LIST *prev_table= 0;
List_iterator<TABLE_LIST> li(*join_list);
const bool straight_join= active_options() & SELECT_STRAIGHT_JOIN;
DBUG_ENTER("simplify_joins");
/*
Try to simplify join operations from join_list.
The most outer join operation is checked for conversion first.
*/
while ((table= li++))
{
table_map used_tables;
table_map not_null_tables= (table_map) 0;
if ((nested_join= table->nested_join))
{
/*
If the element of join_list is a nested join apply
the procedure to its nested join list first.
*/
if (table->join_cond())
{
Item *join_cond= table->join_cond();
/*
If a join condition JC is attached to the table,
check all null rejected predicates in this condition.
If such a predicate over an attribute belonging to
an inner table of an embedded outer join is found,
the outer join is converted to an inner join and
the corresponding join condition is added to JC.
*/
if (simplify_joins(thd, &nested_join->join_list,
false, in_sj || table->sj_cond(),
&join_cond, changelog))
DBUG_RETURN(true);
if (join_cond != table->join_cond())
{
assert(join_cond);
table->set_join_cond(join_cond);
}
}
nested_join->used_tables= (table_map) 0;
nested_join->not_null_tables=(table_map) 0;
if (simplify_joins(thd, &nested_join->join_list, top,
in_sj || table->sj_cond(), cond, changelog))
DBUG_RETURN(true);
used_tables= nested_join->used_tables;
not_null_tables= nested_join->not_null_tables;
}
else
{
used_tables= table->map();
if (*cond)
not_null_tables= (*cond)->not_null_tables();
}
if (table->embedding)
{
table->embedding->nested_join->used_tables|= used_tables;
table->embedding->nested_join->not_null_tables|= not_null_tables;
}
if (!table->outer_join || (used_tables & not_null_tables))
{
/*
For some of the inner tables there are conjunctive predicates
that reject nulls => the outer join can be replaced by an inner join.
*/
if (table->outer_join)
{
*changelog|= OUTER_JOIN_TO_INNER;
table->outer_join= 0;
}
if (table->join_cond())
{
*changelog|= JOIN_COND_TO_WHERE;
/* Add join condition to the WHERE or upper-level join condition. */
if (*cond)
{
Item_cond_and *new_cond=
static_cast<Item_cond_and*>(and_conds(*cond, table->join_cond()));
if (!new_cond)
DBUG_RETURN(true);
new_cond->top_level_item();
/*
It is always a new item as both the upper-level condition and a
join condition existed
*/
assert(!new_cond->fixed);
if (new_cond->fix_fields(thd, NULL))
DBUG_RETURN(true);
/* If join condition has a pending rollback in THD::change_list */
List_iterator<Item> lit(*new_cond->argument_list());
Item *arg;
while ((arg= lit++))
{
/*
Check whether the arguments to AND need substitution
of rollback location.
*/
thd->replace_rollback_place(lit.ref());
}
*cond= new_cond;
}
else
{
*cond= table->join_cond();
/* If join condition has a pending rollback in THD::change_list */
thd->replace_rollback_place(cond);
}
table->set_join_cond(NULL);
}
}
if (!top)
continue;
/*
Only inner tables of non-convertible outer joins remain with
the join condition.
*/
if (table->join_cond())
{
table->dep_tables|= table->join_cond()->used_tables();
// At this point the joined tables always have an embedding join nest:
assert(table->embedding);
table->dep_tables&= ~table->embedding->nested_join->used_tables;
// Embedding table depends on tables used in embedded join conditions.
table->embedding->on_expr_dep_tables|= table->join_cond()->used_tables();
}
if (prev_table)
{
/* The order of tables is reverse: prev_table follows table */
if (prev_table->straight || straight_join)
prev_table->dep_tables|= used_tables;
if (prev_table->join_cond())
{
prev_table->dep_tables|= table->on_expr_dep_tables;
table_map prev_used_tables= prev_table->nested_join ?
prev_table->nested_join->used_tables :
prev_table->map();
/*
If join condition contains only references to inner tables
we still make the inner tables dependent on the outer tables.
It would be enough to set dependency only on one outer table
for them. Yet this is really a rare case.
Note:
PSEUDO_TABLE_BITS mask should not be counted as it
prevents update of inner table dependencies.
For example it might happen if RAND()/COUNT(*) function
is used in JOIN ON clause.
*/
if (!((prev_table->join_cond()->used_tables() & ~PSEUDO_TABLE_BITS) &
~prev_used_tables))
prev_table->dep_tables|= used_tables;
}
}
prev_table= table;
}
/*
Flatten nested joins that can be flattened.
no join condition and not a semi-join => can be flattened.
*/
li.rewind();
while ((table= li++))
{
nested_join= table->nested_join;
if (table->sj_cond() && !in_sj)
{
/*
If this is a semi-join that is not contained within another semi-join,
leave it intact (otherwise it is flattened)
*/
*changelog|= SEMIJOIN;
}
else if (nested_join && !table->join_cond())
{
*changelog|= PAREN_REMOVAL;
TABLE_LIST *tbl;
List_iterator<TABLE_LIST> it(nested_join->join_list);
while ((tbl= it++))
{
tbl->embedding= table->embedding;
tbl->join_list= table->join_list;
tbl->dep_tables|= table->dep_tables;
}
li.replace(nested_join->join_list);
}
}
if (changes)
{
Opt_trace_context * trace= &thd->opt_trace;
if (unlikely(trace->is_started()))
{
Opt_trace_object trace_wrapper(trace);
Opt_trace_object trace_object(trace, "transformations_to_nested_joins");
{
Opt_trace_array trace_changes(trace, "transformations");
if (changes & SEMIJOIN)
trace_changes.add_alnum("semijoin");
if (changes & OUTER_JOIN_TO_INNER)
trace_changes.add_alnum("outer_join_to_inner_join");
if (changes & JOIN_COND_TO_WHERE)
trace_changes.add_alnum("JOIN_condition_to_WHERE");
if (changes & PAREN_REMOVAL)
trace_changes.add_alnum("parenthesis_removal");
}
// the newly transformed query is worth printing
opt_trace_print_expanded_query(thd, this, &trace_object);
}
}
DBUG_RETURN(false);
}